remove has_execute
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87 #include "builtins.h"
88
89 static rtx legitimize_dllimport_symbol (rtx, bool);
90 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
91 static rtx legitimize_pe_coff_symbol (rtx, bool);
92
93 #ifndef CHECK_STACK_LIMIT
94 #define CHECK_STACK_LIMIT (-1)
95 #endif
96
97 /* Return index of given mode in mult and division cost tables. */
98 #define MODE_INDEX(mode) \
99 ((mode) == QImode ? 0 \
100 : (mode) == HImode ? 1 \
101 : (mode) == SImode ? 2 \
102 : (mode) == DImode ? 3 \
103 : 4)
104
105 /* Processor costs (relative to an add) */
106 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
107 #define COSTS_N_BYTES(N) ((N) * 2)
108
109 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110
111 static stringop_algs ix86_size_memcpy[2] = {
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 static stringop_algs ix86_size_memset[2] = {
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
116 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117
118 const
119 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
120 COSTS_N_BYTES (2), /* cost of an add instruction */
121 COSTS_N_BYTES (3), /* cost of a lea instruction */
122 COSTS_N_BYTES (2), /* variable shift costs */
123 COSTS_N_BYTES (3), /* constant shift costs */
124 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 0, /* cost of multiply per each bit set */
130 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
131 COSTS_N_BYTES (3), /* HI */
132 COSTS_N_BYTES (3), /* SI */
133 COSTS_N_BYTES (3), /* DI */
134 COSTS_N_BYTES (5)}, /* other */
135 COSTS_N_BYTES (3), /* cost of movsx */
136 COSTS_N_BYTES (3), /* cost of movzx */
137 0, /* "large" insn */
138 2, /* MOVE_RATIO */
139 2, /* cost for loading QImode using movzbl */
140 {2, 2, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 2, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {2, 2, 2}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {2, 2, 2}, /* cost of storing fp registers
148 in SFmode, DFmode and XFmode */
149 3, /* cost of moving MMX register */
150 {3, 3}, /* cost of loading MMX registers
151 in SImode and DImode */
152 {3, 3}, /* cost of storing MMX registers
153 in SImode and DImode */
154 3, /* cost of moving SSE register */
155 {3, 3, 3}, /* cost of loading SSE registers
156 in SImode, DImode and TImode */
157 {3, 3, 3}, /* cost of storing SSE registers
158 in SImode, DImode and TImode */
159 3, /* MMX or SSE register to integer */
160 0, /* size of l1 cache */
161 0, /* size of l2 cache */
162 0, /* size of prefetch block */
163 0, /* number of parallel prefetches */
164 2, /* Branch cost */
165 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
166 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
167 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
168 COSTS_N_BYTES (2), /* cost of FABS instruction. */
169 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
170 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
171 ix86_size_memcpy,
172 ix86_size_memset,
173 1, /* scalar_stmt_cost. */
174 1, /* scalar load_cost. */
175 1, /* scalar_store_cost. */
176 1, /* vec_stmt_cost. */
177 1, /* vec_to_scalar_cost. */
178 1, /* scalar_to_vec_cost. */
179 1, /* vec_align_load_cost. */
180 1, /* vec_unalign_load_cost. */
181 1, /* vec_store_cost. */
182 1, /* cond_taken_branch_cost. */
183 1, /* cond_not_taken_branch_cost. */
184 };
185
186 /* Processor costs (relative to an add) */
187 static stringop_algs i386_memcpy[2] = {
188 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
189 DUMMY_STRINGOP_ALGS};
190 static stringop_algs i386_memset[2] = {
191 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
192 DUMMY_STRINGOP_ALGS};
193
194 static const
195 struct processor_costs i386_cost = { /* 386 specific costs */
196 COSTS_N_INSNS (1), /* cost of an add instruction */
197 COSTS_N_INSNS (1), /* cost of a lea instruction */
198 COSTS_N_INSNS (3), /* variable shift costs */
199 COSTS_N_INSNS (2), /* constant shift costs */
200 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
201 COSTS_N_INSNS (6), /* HI */
202 COSTS_N_INSNS (6), /* SI */
203 COSTS_N_INSNS (6), /* DI */
204 COSTS_N_INSNS (6)}, /* other */
205 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
206 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
207 COSTS_N_INSNS (23), /* HI */
208 COSTS_N_INSNS (23), /* SI */
209 COSTS_N_INSNS (23), /* DI */
210 COSTS_N_INSNS (23)}, /* other */
211 COSTS_N_INSNS (3), /* cost of movsx */
212 COSTS_N_INSNS (2), /* cost of movzx */
213 15, /* "large" insn */
214 3, /* MOVE_RATIO */
215 4, /* cost for loading QImode using movzbl */
216 {2, 4, 2}, /* cost of loading integer registers
217 in QImode, HImode and SImode.
218 Relative to reg-reg move (2). */
219 {2, 4, 2}, /* cost of storing integer registers */
220 2, /* cost of reg,reg fld/fst */
221 {8, 8, 8}, /* cost of loading fp registers
222 in SFmode, DFmode and XFmode */
223 {8, 8, 8}, /* cost of storing fp registers
224 in SFmode, DFmode and XFmode */
225 2, /* cost of moving MMX register */
226 {4, 8}, /* cost of loading MMX registers
227 in SImode and DImode */
228 {4, 8}, /* cost of storing MMX registers
229 in SImode and DImode */
230 2, /* cost of moving SSE register */
231 {4, 8, 16}, /* cost of loading SSE registers
232 in SImode, DImode and TImode */
233 {4, 8, 16}, /* cost of storing SSE registers
234 in SImode, DImode and TImode */
235 3, /* MMX or SSE register to integer */
236 0, /* size of l1 cache */
237 0, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (22), /* cost of FABS instruction. */
245 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
247 i386_memcpy,
248 i386_memset,
249 1, /* scalar_stmt_cost. */
250 1, /* scalar load_cost. */
251 1, /* scalar_store_cost. */
252 1, /* vec_stmt_cost. */
253 1, /* vec_to_scalar_cost. */
254 1, /* scalar_to_vec_cost. */
255 1, /* vec_align_load_cost. */
256 2, /* vec_unalign_load_cost. */
257 1, /* vec_store_cost. */
258 3, /* cond_taken_branch_cost. */
259 1, /* cond_not_taken_branch_cost. */
260 };
261
262 static stringop_algs i486_memcpy[2] = {
263 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
264 DUMMY_STRINGOP_ALGS};
265 static stringop_algs i486_memset[2] = {
266 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
267 DUMMY_STRINGOP_ALGS};
268
269 static const
270 struct processor_costs i486_cost = { /* 486 specific costs */
271 COSTS_N_INSNS (1), /* cost of an add instruction */
272 COSTS_N_INSNS (1), /* cost of a lea instruction */
273 COSTS_N_INSNS (3), /* variable shift costs */
274 COSTS_N_INSNS (2), /* constant shift costs */
275 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
276 COSTS_N_INSNS (12), /* HI */
277 COSTS_N_INSNS (12), /* SI */
278 COSTS_N_INSNS (12), /* DI */
279 COSTS_N_INSNS (12)}, /* other */
280 1, /* cost of multiply per each bit set */
281 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
282 COSTS_N_INSNS (40), /* HI */
283 COSTS_N_INSNS (40), /* SI */
284 COSTS_N_INSNS (40), /* DI */
285 COSTS_N_INSNS (40)}, /* other */
286 COSTS_N_INSNS (3), /* cost of movsx */
287 COSTS_N_INSNS (2), /* cost of movzx */
288 15, /* "large" insn */
289 3, /* MOVE_RATIO */
290 4, /* cost for loading QImode using movzbl */
291 {2, 4, 2}, /* cost of loading integer registers
292 in QImode, HImode and SImode.
293 Relative to reg-reg move (2). */
294 {2, 4, 2}, /* cost of storing integer registers */
295 2, /* cost of reg,reg fld/fst */
296 {8, 8, 8}, /* cost of loading fp registers
297 in SFmode, DFmode and XFmode */
298 {8, 8, 8}, /* cost of storing fp registers
299 in SFmode, DFmode and XFmode */
300 2, /* cost of moving MMX register */
301 {4, 8}, /* cost of loading MMX registers
302 in SImode and DImode */
303 {4, 8}, /* cost of storing MMX registers
304 in SImode and DImode */
305 2, /* cost of moving SSE register */
306 {4, 8, 16}, /* cost of loading SSE registers
307 in SImode, DImode and TImode */
308 {4, 8, 16}, /* cost of storing SSE registers
309 in SImode, DImode and TImode */
310 3, /* MMX or SSE register to integer */
311 4, /* size of l1 cache. 486 has 8kB cache
312 shared for code and data, so 4kB is
313 not really precise. */
314 4, /* size of l2 cache */
315 0, /* size of prefetch block */
316 0, /* number of parallel prefetches */
317 1, /* Branch cost */
318 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
319 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
320 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
321 COSTS_N_INSNS (3), /* cost of FABS instruction. */
322 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
323 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
324 i486_memcpy,
325 i486_memset,
326 1, /* scalar_stmt_cost. */
327 1, /* scalar load_cost. */
328 1, /* scalar_store_cost. */
329 1, /* vec_stmt_cost. */
330 1, /* vec_to_scalar_cost. */
331 1, /* scalar_to_vec_cost. */
332 1, /* vec_align_load_cost. */
333 2, /* vec_unalign_load_cost. */
334 1, /* vec_store_cost. */
335 3, /* cond_taken_branch_cost. */
336 1, /* cond_not_taken_branch_cost. */
337 };
338
339 static stringop_algs pentium_memcpy[2] = {
340 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
341 DUMMY_STRINGOP_ALGS};
342 static stringop_algs pentium_memset[2] = {
343 {libcall, {{-1, rep_prefix_4_byte, false}}},
344 DUMMY_STRINGOP_ALGS};
345
346 static const
347 struct processor_costs pentium_cost = {
348 COSTS_N_INSNS (1), /* cost of an add instruction */
349 COSTS_N_INSNS (1), /* cost of a lea instruction */
350 COSTS_N_INSNS (4), /* variable shift costs */
351 COSTS_N_INSNS (1), /* constant shift costs */
352 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
353 COSTS_N_INSNS (11), /* HI */
354 COSTS_N_INSNS (11), /* SI */
355 COSTS_N_INSNS (11), /* DI */
356 COSTS_N_INSNS (11)}, /* other */
357 0, /* cost of multiply per each bit set */
358 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
359 COSTS_N_INSNS (25), /* HI */
360 COSTS_N_INSNS (25), /* SI */
361 COSTS_N_INSNS (25), /* DI */
362 COSTS_N_INSNS (25)}, /* other */
363 COSTS_N_INSNS (3), /* cost of movsx */
364 COSTS_N_INSNS (2), /* cost of movzx */
365 8, /* "large" insn */
366 6, /* MOVE_RATIO */
367 6, /* cost for loading QImode using movzbl */
368 {2, 4, 2}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 4, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of storing fp registers
376 in SFmode, DFmode and XFmode */
377 8, /* cost of moving MMX register */
378 {8, 8}, /* cost of loading MMX registers
379 in SImode and DImode */
380 {8, 8}, /* cost of storing MMX registers
381 in SImode and DImode */
382 2, /* cost of moving SSE register */
383 {4, 8, 16}, /* cost of loading SSE registers
384 in SImode, DImode and TImode */
385 {4, 8, 16}, /* cost of storing SSE registers
386 in SImode, DImode and TImode */
387 3, /* MMX or SSE register to integer */
388 8, /* size of l1 cache. */
389 8, /* size of l2 cache */
390 0, /* size of prefetch block */
391 0, /* number of parallel prefetches */
392 2, /* Branch cost */
393 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
394 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
395 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
396 COSTS_N_INSNS (1), /* cost of FABS instruction. */
397 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
398 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
399 pentium_memcpy,
400 pentium_memset,
401 1, /* scalar_stmt_cost. */
402 1, /* scalar load_cost. */
403 1, /* scalar_store_cost. */
404 1, /* vec_stmt_cost. */
405 1, /* vec_to_scalar_cost. */
406 1, /* scalar_to_vec_cost. */
407 1, /* vec_align_load_cost. */
408 2, /* vec_unalign_load_cost. */
409 1, /* vec_store_cost. */
410 3, /* cond_taken_branch_cost. */
411 1, /* cond_not_taken_branch_cost. */
412 };
413
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
415 (we ensure the alignment). For small blocks inline loop is still a
416 noticeable win, for bigger blocks either rep movsl or rep movsb is
417 way to go. Rep movsb has apparently more expensive startup time in CPU,
418 but after 4K the difference is down in the noise. */
419 static stringop_algs pentiumpro_memcpy[2] = {
420 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
421 {8192, rep_prefix_4_byte, false},
422 {-1, rep_prefix_1_byte, false}}},
423 DUMMY_STRINGOP_ALGS};
424 static stringop_algs pentiumpro_memset[2] = {
425 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
426 {8192, rep_prefix_4_byte, false},
427 {-1, libcall, false}}},
428 DUMMY_STRINGOP_ALGS};
429 static const
430 struct processor_costs pentiumpro_cost = {
431 COSTS_N_INSNS (1), /* cost of an add instruction */
432 COSTS_N_INSNS (1), /* cost of a lea instruction */
433 COSTS_N_INSNS (1), /* variable shift costs */
434 COSTS_N_INSNS (1), /* constant shift costs */
435 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
436 COSTS_N_INSNS (4), /* HI */
437 COSTS_N_INSNS (4), /* SI */
438 COSTS_N_INSNS (4), /* DI */
439 COSTS_N_INSNS (4)}, /* other */
440 0, /* cost of multiply per each bit set */
441 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
442 COSTS_N_INSNS (17), /* HI */
443 COSTS_N_INSNS (17), /* SI */
444 COSTS_N_INSNS (17), /* DI */
445 COSTS_N_INSNS (17)}, /* other */
446 COSTS_N_INSNS (1), /* cost of movsx */
447 COSTS_N_INSNS (1), /* cost of movzx */
448 8, /* "large" insn */
449 6, /* MOVE_RATIO */
450 2, /* cost for loading QImode using movzbl */
451 {4, 4, 4}, /* cost of loading integer registers
452 in QImode, HImode and SImode.
453 Relative to reg-reg move (2). */
454 {2, 2, 2}, /* cost of storing integer registers */
455 2, /* cost of reg,reg fld/fst */
456 {2, 2, 6}, /* cost of loading fp registers
457 in SFmode, DFmode and XFmode */
458 {4, 4, 6}, /* cost of storing fp registers
459 in SFmode, DFmode and XFmode */
460 2, /* cost of moving MMX register */
461 {2, 2}, /* cost of loading MMX registers
462 in SImode and DImode */
463 {2, 2}, /* cost of storing MMX registers
464 in SImode and DImode */
465 2, /* cost of moving SSE register */
466 {2, 2, 8}, /* cost of loading SSE registers
467 in SImode, DImode and TImode */
468 {2, 2, 8}, /* cost of storing SSE registers
469 in SImode, DImode and TImode */
470 3, /* MMX or SSE register to integer */
471 8, /* size of l1 cache. */
472 256, /* size of l2 cache */
473 32, /* size of prefetch block */
474 6, /* number of parallel prefetches */
475 2, /* Branch cost */
476 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
477 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
478 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
479 COSTS_N_INSNS (2), /* cost of FABS instruction. */
480 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
481 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
482 pentiumpro_memcpy,
483 pentiumpro_memset,
484 1, /* scalar_stmt_cost. */
485 1, /* scalar load_cost. */
486 1, /* scalar_store_cost. */
487 1, /* vec_stmt_cost. */
488 1, /* vec_to_scalar_cost. */
489 1, /* scalar_to_vec_cost. */
490 1, /* vec_align_load_cost. */
491 2, /* vec_unalign_load_cost. */
492 1, /* vec_store_cost. */
493 3, /* cond_taken_branch_cost. */
494 1, /* cond_not_taken_branch_cost. */
495 };
496
497 static stringop_algs geode_memcpy[2] = {
498 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
499 DUMMY_STRINGOP_ALGS};
500 static stringop_algs geode_memset[2] = {
501 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static const
504 struct processor_costs geode_cost = {
505 COSTS_N_INSNS (1), /* cost of an add instruction */
506 COSTS_N_INSNS (1), /* cost of a lea instruction */
507 COSTS_N_INSNS (2), /* variable shift costs */
508 COSTS_N_INSNS (1), /* constant shift costs */
509 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
510 COSTS_N_INSNS (4), /* HI */
511 COSTS_N_INSNS (7), /* SI */
512 COSTS_N_INSNS (7), /* DI */
513 COSTS_N_INSNS (7)}, /* other */
514 0, /* cost of multiply per each bit set */
515 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
516 COSTS_N_INSNS (23), /* HI */
517 COSTS_N_INSNS (39), /* SI */
518 COSTS_N_INSNS (39), /* DI */
519 COSTS_N_INSNS (39)}, /* other */
520 COSTS_N_INSNS (1), /* cost of movsx */
521 COSTS_N_INSNS (1), /* cost of movzx */
522 8, /* "large" insn */
523 4, /* MOVE_RATIO */
524 1, /* cost for loading QImode using movzbl */
525 {1, 1, 1}, /* cost of loading integer registers
526 in QImode, HImode and SImode.
527 Relative to reg-reg move (2). */
528 {1, 1, 1}, /* cost of storing integer registers */
529 1, /* cost of reg,reg fld/fst */
530 {1, 1, 1}, /* cost of loading fp registers
531 in SFmode, DFmode and XFmode */
532 {4, 6, 6}, /* cost of storing fp registers
533 in SFmode, DFmode and XFmode */
534
535 1, /* cost of moving MMX register */
536 {1, 1}, /* cost of loading MMX registers
537 in SImode and DImode */
538 {1, 1}, /* cost of storing MMX registers
539 in SImode and DImode */
540 1, /* cost of moving SSE register */
541 {1, 1, 1}, /* cost of loading SSE registers
542 in SImode, DImode and TImode */
543 {1, 1, 1}, /* cost of storing SSE registers
544 in SImode, DImode and TImode */
545 1, /* MMX or SSE register to integer */
546 64, /* size of l1 cache. */
547 128, /* size of l2 cache. */
548 32, /* size of prefetch block */
549 1, /* number of parallel prefetches */
550 1, /* Branch cost */
551 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
552 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
553 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
554 COSTS_N_INSNS (1), /* cost of FABS instruction. */
555 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
556 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
557 geode_memcpy,
558 geode_memset,
559 1, /* scalar_stmt_cost. */
560 1, /* scalar load_cost. */
561 1, /* scalar_store_cost. */
562 1, /* vec_stmt_cost. */
563 1, /* vec_to_scalar_cost. */
564 1, /* scalar_to_vec_cost. */
565 1, /* vec_align_load_cost. */
566 2, /* vec_unalign_load_cost. */
567 1, /* vec_store_cost. */
568 3, /* cond_taken_branch_cost. */
569 1, /* cond_not_taken_branch_cost. */
570 };
571
572 static stringop_algs k6_memcpy[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS};
575 static stringop_algs k6_memset[2] = {
576 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
577 DUMMY_STRINGOP_ALGS};
578 static const
579 struct processor_costs k6_cost = {
580 COSTS_N_INSNS (1), /* cost of an add instruction */
581 COSTS_N_INSNS (2), /* cost of a lea instruction */
582 COSTS_N_INSNS (1), /* variable shift costs */
583 COSTS_N_INSNS (1), /* constant shift costs */
584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
585 COSTS_N_INSNS (3), /* HI */
586 COSTS_N_INSNS (3), /* SI */
587 COSTS_N_INSNS (3), /* DI */
588 COSTS_N_INSNS (3)}, /* other */
589 0, /* cost of multiply per each bit set */
590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
591 COSTS_N_INSNS (18), /* HI */
592 COSTS_N_INSNS (18), /* SI */
593 COSTS_N_INSNS (18), /* DI */
594 COSTS_N_INSNS (18)}, /* other */
595 COSTS_N_INSNS (2), /* cost of movsx */
596 COSTS_N_INSNS (2), /* cost of movzx */
597 8, /* "large" insn */
598 4, /* MOVE_RATIO */
599 3, /* cost for loading QImode using movzbl */
600 {4, 5, 4}, /* cost of loading integer registers
601 in QImode, HImode and SImode.
602 Relative to reg-reg move (2). */
603 {2, 3, 2}, /* cost of storing integer registers */
604 4, /* cost of reg,reg fld/fst */
605 {6, 6, 6}, /* cost of loading fp registers
606 in SFmode, DFmode and XFmode */
607 {4, 4, 4}, /* cost of storing fp registers
608 in SFmode, DFmode and XFmode */
609 2, /* cost of moving MMX register */
610 {2, 2}, /* cost of loading MMX registers
611 in SImode and DImode */
612 {2, 2}, /* cost of storing MMX registers
613 in SImode and DImode */
614 2, /* cost of moving SSE register */
615 {2, 2, 8}, /* cost of loading SSE registers
616 in SImode, DImode and TImode */
617 {2, 2, 8}, /* cost of storing SSE registers
618 in SImode, DImode and TImode */
619 6, /* MMX or SSE register to integer */
620 32, /* size of l1 cache. */
621 32, /* size of l2 cache. Some models
622 have integrated l2 cache, but
623 optimizing for k6 is not important
624 enough to worry about that. */
625 32, /* size of prefetch block */
626 1, /* number of parallel prefetches */
627 1, /* Branch cost */
628 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
634 k6_memcpy,
635 k6_memset,
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
647 };
648
649 /* For some reason, Athlon deals better with REP prefix (relative to loops)
650 compared to K8. Alignment becomes important after 8 bytes for memcpy and
651 128 bytes for memset. */
652 static stringop_algs athlon_memcpy[2] = {
653 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
654 DUMMY_STRINGOP_ALGS};
655 static stringop_algs athlon_memset[2] = {
656 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static const
659 struct processor_costs athlon_cost = {
660 COSTS_N_INSNS (1), /* cost of an add instruction */
661 COSTS_N_INSNS (2), /* cost of a lea instruction */
662 COSTS_N_INSNS (1), /* variable shift costs */
663 COSTS_N_INSNS (1), /* constant shift costs */
664 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
665 COSTS_N_INSNS (5), /* HI */
666 COSTS_N_INSNS (5), /* SI */
667 COSTS_N_INSNS (5), /* DI */
668 COSTS_N_INSNS (5)}, /* other */
669 0, /* cost of multiply per each bit set */
670 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
671 COSTS_N_INSNS (26), /* HI */
672 COSTS_N_INSNS (42), /* SI */
673 COSTS_N_INSNS (74), /* DI */
674 COSTS_N_INSNS (74)}, /* other */
675 COSTS_N_INSNS (1), /* cost of movsx */
676 COSTS_N_INSNS (1), /* cost of movzx */
677 8, /* "large" insn */
678 9, /* MOVE_RATIO */
679 4, /* cost for loading QImode using movzbl */
680 {3, 4, 3}, /* cost of loading integer registers
681 in QImode, HImode and SImode.
682 Relative to reg-reg move (2). */
683 {3, 4, 3}, /* cost of storing integer registers */
684 4, /* cost of reg,reg fld/fst */
685 {4, 4, 12}, /* cost of loading fp registers
686 in SFmode, DFmode and XFmode */
687 {6, 6, 8}, /* cost of storing fp registers
688 in SFmode, DFmode and XFmode */
689 2, /* cost of moving MMX register */
690 {4, 4}, /* cost of loading MMX registers
691 in SImode and DImode */
692 {4, 4}, /* cost of storing MMX registers
693 in SImode and DImode */
694 2, /* cost of moving SSE register */
695 {4, 4, 6}, /* cost of loading SSE registers
696 in SImode, DImode and TImode */
697 {4, 4, 5}, /* cost of storing SSE registers
698 in SImode, DImode and TImode */
699 5, /* MMX or SSE register to integer */
700 64, /* size of l1 cache. */
701 256, /* size of l2 cache. */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 5, /* Branch cost */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 athlon_memcpy,
712 athlon_memset,
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
724 };
725
726 /* K8 has optimized REP instruction for medium sized blocks, but for very
727 small blocks it is better to use loop. For large blocks, libcall can
728 do nontemporary accesses and beat inline considerably. */
729 static stringop_algs k8_memcpy[2] = {
730 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
731 {-1, rep_prefix_4_byte, false}}},
732 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
733 {-1, libcall, false}}}};
734 static stringop_algs k8_memset[2] = {
735 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
736 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
737 {libcall, {{48, unrolled_loop, false},
738 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
739 static const
740 struct processor_costs k8_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (2), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (4), /* HI */
747 COSTS_N_INSNS (3), /* SI */
748 COSTS_N_INSNS (4), /* DI */
749 COSTS_N_INSNS (5)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (26), /* HI */
753 COSTS_N_INSNS (42), /* SI */
754 COSTS_N_INSNS (74), /* DI */
755 COSTS_N_INSNS (74)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 8, /* "large" insn */
759 9, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {3, 4, 3}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {3, 4, 3}, /* cost of storing integer registers */
765 4, /* cost of reg,reg fld/fst */
766 {4, 4, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {6, 6, 8}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 2, /* cost of moving MMX register */
771 {3, 3}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {4, 4}, /* cost of storing MMX registers
774 in SImode and DImode */
775 2, /* cost of moving SSE register */
776 {4, 3, 6}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {4, 4, 5}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 5, /* MMX or SSE register to integer */
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
788 time). */
789 100, /* number of parallel prefetches */
790 3, /* Branch cost */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797
798 k8_memcpy,
799 k8_memset,
800 4, /* scalar_stmt_cost. */
801 2, /* scalar load_cost. */
802 2, /* scalar_store_cost. */
803 5, /* vec_stmt_cost. */
804 0, /* vec_to_scalar_cost. */
805 2, /* scalar_to_vec_cost. */
806 2, /* vec_align_load_cost. */
807 3, /* vec_unalign_load_cost. */
808 3, /* vec_store_cost. */
809 3, /* cond_taken_branch_cost. */
810 2, /* cond_not_taken_branch_cost. */
811 };
812
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 static stringop_algs amdfam10_memcpy[2] = {
817 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
818 {-1, rep_prefix_4_byte, false}}},
819 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
820 {-1, libcall, false}}}};
821 static stringop_algs amdfam10_memset[2] = {
822 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
823 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
824 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
825 {-1, libcall, false}}}};
826 struct processor_costs amdfam10_cost = {
827 COSTS_N_INSNS (1), /* cost of an add instruction */
828 COSTS_N_INSNS (2), /* cost of a lea instruction */
829 COSTS_N_INSNS (1), /* variable shift costs */
830 COSTS_N_INSNS (1), /* constant shift costs */
831 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
832 COSTS_N_INSNS (4), /* HI */
833 COSTS_N_INSNS (3), /* SI */
834 COSTS_N_INSNS (4), /* DI */
835 COSTS_N_INSNS (5)}, /* other */
836 0, /* cost of multiply per each bit set */
837 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
838 COSTS_N_INSNS (35), /* HI */
839 COSTS_N_INSNS (51), /* SI */
840 COSTS_N_INSNS (83), /* DI */
841 COSTS_N_INSNS (83)}, /* other */
842 COSTS_N_INSNS (1), /* cost of movsx */
843 COSTS_N_INSNS (1), /* cost of movzx */
844 8, /* "large" insn */
845 9, /* MOVE_RATIO */
846 4, /* cost for loading QImode using movzbl */
847 {3, 4, 3}, /* cost of loading integer registers
848 in QImode, HImode and SImode.
849 Relative to reg-reg move (2). */
850 {3, 4, 3}, /* cost of storing integer registers */
851 4, /* cost of reg,reg fld/fst */
852 {4, 4, 12}, /* cost of loading fp registers
853 in SFmode, DFmode and XFmode */
854 {6, 6, 8}, /* cost of storing fp registers
855 in SFmode, DFmode and XFmode */
856 2, /* cost of moving MMX register */
857 {3, 3}, /* cost of loading MMX registers
858 in SImode and DImode */
859 {4, 4}, /* cost of storing MMX registers
860 in SImode and DImode */
861 2, /* cost of moving SSE register */
862 {4, 4, 3}, /* cost of loading SSE registers
863 in SImode, DImode and TImode */
864 {4, 4, 5}, /* cost of storing SSE registers
865 in SImode, DImode and TImode */
866 3, /* MMX or SSE register to integer */
867 /* On K8:
868 MOVD reg64, xmmreg Double FSTORE 4
869 MOVD reg32, xmmreg Double FSTORE 4
870 On AMDFAM10:
871 MOVD reg64, xmmreg Double FADD 3
872 1/1 1/1
873 MOVD reg32, xmmreg Double FADD 3
874 1/1 1/1 */
875 64, /* size of l1 cache. */
876 512, /* size of l2 cache. */
877 64, /* size of prefetch block */
878 /* New AMD processors never drop prefetches; if they cannot be performed
879 immediately, they are queued. We set number of simultaneous prefetches
880 to a large constant to reflect this (it probably is not a good idea not
881 to limit number of prefetches at all, as their execution also takes some
882 time). */
883 100, /* number of parallel prefetches */
884 2, /* Branch cost */
885 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
886 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
887 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
888 COSTS_N_INSNS (2), /* cost of FABS instruction. */
889 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
890 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
891
892 amdfam10_memcpy,
893 amdfam10_memset,
894 4, /* scalar_stmt_cost. */
895 2, /* scalar load_cost. */
896 2, /* scalar_store_cost. */
897 6, /* vec_stmt_cost. */
898 0, /* vec_to_scalar_cost. */
899 2, /* scalar_to_vec_cost. */
900 2, /* vec_align_load_cost. */
901 2, /* vec_unalign_load_cost. */
902 2, /* vec_store_cost. */
903 2, /* cond_taken_branch_cost. */
904 1, /* cond_not_taken_branch_cost. */
905 };
906
907 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
908 very small blocks it is better to use loop. For large blocks, libcall
909 can do nontemporary accesses and beat inline considerably. */
910 static stringop_algs bdver1_memcpy[2] = {
911 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
912 {-1, rep_prefix_4_byte, false}}},
913 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
914 {-1, libcall, false}}}};
915 static stringop_algs bdver1_memset[2] = {
916 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
917 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
918 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
919 {-1, libcall, false}}}};
920
921 const struct processor_costs bdver1_cost = {
922 COSTS_N_INSNS (1), /* cost of an add instruction */
923 COSTS_N_INSNS (1), /* cost of a lea instruction */
924 COSTS_N_INSNS (1), /* variable shift costs */
925 COSTS_N_INSNS (1), /* constant shift costs */
926 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
927 COSTS_N_INSNS (4), /* HI */
928 COSTS_N_INSNS (4), /* SI */
929 COSTS_N_INSNS (6), /* DI */
930 COSTS_N_INSNS (6)}, /* other */
931 0, /* cost of multiply per each bit set */
932 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
933 COSTS_N_INSNS (35), /* HI */
934 COSTS_N_INSNS (51), /* SI */
935 COSTS_N_INSNS (83), /* DI */
936 COSTS_N_INSNS (83)}, /* other */
937 COSTS_N_INSNS (1), /* cost of movsx */
938 COSTS_N_INSNS (1), /* cost of movzx */
939 8, /* "large" insn */
940 9, /* MOVE_RATIO */
941 4, /* cost for loading QImode using movzbl */
942 {5, 5, 4}, /* cost of loading integer registers
943 in QImode, HImode and SImode.
944 Relative to reg-reg move (2). */
945 {4, 4, 4}, /* cost of storing integer registers */
946 2, /* cost of reg,reg fld/fst */
947 {5, 5, 12}, /* cost of loading fp registers
948 in SFmode, DFmode and XFmode */
949 {4, 4, 8}, /* cost of storing fp registers
950 in SFmode, DFmode and XFmode */
951 2, /* cost of moving MMX register */
952 {4, 4}, /* cost of loading MMX registers
953 in SImode and DImode */
954 {4, 4}, /* cost of storing MMX registers
955 in SImode and DImode */
956 2, /* cost of moving SSE register */
957 {4, 4, 4}, /* cost of loading SSE registers
958 in SImode, DImode and TImode */
959 {4, 4, 4}, /* cost of storing SSE registers
960 in SImode, DImode and TImode */
961 2, /* MMX or SSE register to integer */
962 /* On K8:
963 MOVD reg64, xmmreg Double FSTORE 4
964 MOVD reg32, xmmreg Double FSTORE 4
965 On AMDFAM10:
966 MOVD reg64, xmmreg Double FADD 3
967 1/1 1/1
968 MOVD reg32, xmmreg Double FADD 3
969 1/1 1/1 */
970 16, /* size of l1 cache. */
971 2048, /* size of l2 cache. */
972 64, /* size of prefetch block */
973 /* New AMD processors never drop prefetches; if they cannot be performed
974 immediately, they are queued. We set number of simultaneous prefetches
975 to a large constant to reflect this (it probably is not a good idea not
976 to limit number of prefetches at all, as their execution also takes some
977 time). */
978 100, /* number of parallel prefetches */
979 2, /* Branch cost */
980 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
981 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
982 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
983 COSTS_N_INSNS (2), /* cost of FABS instruction. */
984 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
985 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
986
987 bdver1_memcpy,
988 bdver1_memset,
989 6, /* scalar_stmt_cost. */
990 4, /* scalar load_cost. */
991 4, /* scalar_store_cost. */
992 6, /* vec_stmt_cost. */
993 0, /* vec_to_scalar_cost. */
994 2, /* scalar_to_vec_cost. */
995 4, /* vec_align_load_cost. */
996 4, /* vec_unalign_load_cost. */
997 4, /* vec_store_cost. */
998 2, /* cond_taken_branch_cost. */
999 1, /* cond_not_taken_branch_cost. */
1000 };
1001
1002 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1003 very small blocks it is better to use loop. For large blocks, libcall
1004 can do nontemporary accesses and beat inline considerably. */
1005
1006 static stringop_algs bdver2_memcpy[2] = {
1007 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1008 {-1, rep_prefix_4_byte, false}}},
1009 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1010 {-1, libcall, false}}}};
1011 static stringop_algs bdver2_memset[2] = {
1012 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1013 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1014 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1015 {-1, libcall, false}}}};
1016
1017 const struct processor_costs bdver2_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1036 9, /* MOVE_RATIO */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 /* On K8:
1059 MOVD reg64, xmmreg Double FSTORE 4
1060 MOVD reg32, xmmreg Double FSTORE 4
1061 On AMDFAM10:
1062 MOVD reg64, xmmreg Double FADD 3
1063 1/1 1/1
1064 MOVD reg32, xmmreg Double FADD 3
1065 1/1 1/1 */
1066 16, /* size of l1 cache. */
1067 2048, /* size of l2 cache. */
1068 64, /* size of prefetch block */
1069 /* New AMD processors never drop prefetches; if they cannot be performed
1070 immediately, they are queued. We set number of simultaneous prefetches
1071 to a large constant to reflect this (it probably is not a good idea not
1072 to limit number of prefetches at all, as their execution also takes some
1073 time). */
1074 100, /* number of parallel prefetches */
1075 2, /* Branch cost */
1076 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1077 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1078 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1079 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1080 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1081 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1082
1083 bdver2_memcpy,
1084 bdver2_memset,
1085 6, /* scalar_stmt_cost. */
1086 4, /* scalar load_cost. */
1087 4, /* scalar_store_cost. */
1088 6, /* vec_stmt_cost. */
1089 0, /* vec_to_scalar_cost. */
1090 2, /* scalar_to_vec_cost. */
1091 4, /* vec_align_load_cost. */
1092 4, /* vec_unalign_load_cost. */
1093 4, /* vec_store_cost. */
1094 2, /* cond_taken_branch_cost. */
1095 1, /* cond_not_taken_branch_cost. */
1096 };
1097
1098
1099 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1100 very small blocks it is better to use loop. For large blocks, libcall
1101 can do nontemporary accesses and beat inline considerably. */
1102 static stringop_algs bdver3_memcpy[2] = {
1103 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1104 {-1, rep_prefix_4_byte, false}}},
1105 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1106 {-1, libcall, false}}}};
1107 static stringop_algs bdver3_memset[2] = {
1108 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1109 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1110 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1111 {-1, libcall, false}}}};
1112 struct processor_costs bdver3_cost = {
1113 COSTS_N_INSNS (1), /* cost of an add instruction */
1114 COSTS_N_INSNS (1), /* cost of a lea instruction */
1115 COSTS_N_INSNS (1), /* variable shift costs */
1116 COSTS_N_INSNS (1), /* constant shift costs */
1117 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1118 COSTS_N_INSNS (4), /* HI */
1119 COSTS_N_INSNS (4), /* SI */
1120 COSTS_N_INSNS (6), /* DI */
1121 COSTS_N_INSNS (6)}, /* other */
1122 0, /* cost of multiply per each bit set */
1123 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1124 COSTS_N_INSNS (35), /* HI */
1125 COSTS_N_INSNS (51), /* SI */
1126 COSTS_N_INSNS (83), /* DI */
1127 COSTS_N_INSNS (83)}, /* other */
1128 COSTS_N_INSNS (1), /* cost of movsx */
1129 COSTS_N_INSNS (1), /* cost of movzx */
1130 8, /* "large" insn */
1131 9, /* MOVE_RATIO */
1132 4, /* cost for loading QImode using movzbl */
1133 {5, 5, 4}, /* cost of loading integer registers
1134 in QImode, HImode and SImode.
1135 Relative to reg-reg move (2). */
1136 {4, 4, 4}, /* cost of storing integer registers */
1137 2, /* cost of reg,reg fld/fst */
1138 {5, 5, 12}, /* cost of loading fp registers
1139 in SFmode, DFmode and XFmode */
1140 {4, 4, 8}, /* cost of storing fp registers
1141 in SFmode, DFmode and XFmode */
1142 2, /* cost of moving MMX register */
1143 {4, 4}, /* cost of loading MMX registers
1144 in SImode and DImode */
1145 {4, 4}, /* cost of storing MMX registers
1146 in SImode and DImode */
1147 2, /* cost of moving SSE register */
1148 {4, 4, 4}, /* cost of loading SSE registers
1149 in SImode, DImode and TImode */
1150 {4, 4, 4}, /* cost of storing SSE registers
1151 in SImode, DImode and TImode */
1152 2, /* MMX or SSE register to integer */
1153 16, /* size of l1 cache. */
1154 2048, /* size of l2 cache. */
1155 64, /* size of prefetch block */
1156 /* New AMD processors never drop prefetches; if they cannot be performed
1157 immediately, they are queued. We set number of simultaneous prefetches
1158 to a large constant to reflect this (it probably is not a good idea not
1159 to limit number of prefetches at all, as their execution also takes some
1160 time). */
1161 100, /* number of parallel prefetches */
1162 2, /* Branch cost */
1163 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1164 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1165 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1166 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1167 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1168 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1169
1170 bdver3_memcpy,
1171 bdver3_memset,
1172 6, /* scalar_stmt_cost. */
1173 4, /* scalar load_cost. */
1174 4, /* scalar_store_cost. */
1175 6, /* vec_stmt_cost. */
1176 0, /* vec_to_scalar_cost. */
1177 2, /* scalar_to_vec_cost. */
1178 4, /* vec_align_load_cost. */
1179 4, /* vec_unalign_load_cost. */
1180 4, /* vec_store_cost. */
1181 2, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1183 };
1184
1185 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1186 very small blocks it is better to use loop. For large blocks, libcall
1187 can do nontemporary accesses and beat inline considerably. */
1188 static stringop_algs bdver4_memcpy[2] = {
1189 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1190 {-1, rep_prefix_4_byte, false}}},
1191 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1192 {-1, libcall, false}}}};
1193 static stringop_algs bdver4_memset[2] = {
1194 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1195 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1196 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1197 {-1, libcall, false}}}};
1198 struct processor_costs bdver4_cost = {
1199 COSTS_N_INSNS (1), /* cost of an add instruction */
1200 COSTS_N_INSNS (1), /* cost of a lea instruction */
1201 COSTS_N_INSNS (1), /* variable shift costs */
1202 COSTS_N_INSNS (1), /* constant shift costs */
1203 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1204 COSTS_N_INSNS (4), /* HI */
1205 COSTS_N_INSNS (4), /* SI */
1206 COSTS_N_INSNS (6), /* DI */
1207 COSTS_N_INSNS (6)}, /* other */
1208 0, /* cost of multiply per each bit set */
1209 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1210 COSTS_N_INSNS (35), /* HI */
1211 COSTS_N_INSNS (51), /* SI */
1212 COSTS_N_INSNS (83), /* DI */
1213 COSTS_N_INSNS (83)}, /* other */
1214 COSTS_N_INSNS (1), /* cost of movsx */
1215 COSTS_N_INSNS (1), /* cost of movzx */
1216 8, /* "large" insn */
1217 9, /* MOVE_RATIO */
1218 4, /* cost for loading QImode using movzbl */
1219 {5, 5, 4}, /* cost of loading integer registers
1220 in QImode, HImode and SImode.
1221 Relative to reg-reg move (2). */
1222 {4, 4, 4}, /* cost of storing integer registers */
1223 2, /* cost of reg,reg fld/fst */
1224 {5, 5, 12}, /* cost of loading fp registers
1225 in SFmode, DFmode and XFmode */
1226 {4, 4, 8}, /* cost of storing fp registers
1227 in SFmode, DFmode and XFmode */
1228 2, /* cost of moving MMX register */
1229 {4, 4}, /* cost of loading MMX registers
1230 in SImode and DImode */
1231 {4, 4}, /* cost of storing MMX registers
1232 in SImode and DImode */
1233 2, /* cost of moving SSE register */
1234 {4, 4, 4}, /* cost of loading SSE registers
1235 in SImode, DImode and TImode */
1236 {4, 4, 4}, /* cost of storing SSE registers
1237 in SImode, DImode and TImode */
1238 2, /* MMX or SSE register to integer */
1239 16, /* size of l1 cache. */
1240 2048, /* size of l2 cache. */
1241 64, /* size of prefetch block */
1242 /* New AMD processors never drop prefetches; if they cannot be performed
1243 immediately, they are queued. We set number of simultaneous prefetches
1244 to a large constant to reflect this (it probably is not a good idea not
1245 to limit number of prefetches at all, as their execution also takes some
1246 time). */
1247 100, /* number of parallel prefetches */
1248 2, /* Branch cost */
1249 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1250 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1251 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1252 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1253 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1254 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1255
1256 bdver4_memcpy,
1257 bdver4_memset,
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1269 };
1270
1271 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1272 very small blocks it is better to use loop. For large blocks, libcall can
1273 do nontemporary accesses and beat inline considerably. */
1274 static stringop_algs btver1_memcpy[2] = {
1275 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1276 {-1, rep_prefix_4_byte, false}}},
1277 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1278 {-1, libcall, false}}}};
1279 static stringop_algs btver1_memset[2] = {
1280 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1281 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1282 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1283 {-1, libcall, false}}}};
1284 const struct processor_costs btver1_cost = {
1285 COSTS_N_INSNS (1), /* cost of an add instruction */
1286 COSTS_N_INSNS (2), /* cost of a lea instruction */
1287 COSTS_N_INSNS (1), /* variable shift costs */
1288 COSTS_N_INSNS (1), /* constant shift costs */
1289 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1290 COSTS_N_INSNS (4), /* HI */
1291 COSTS_N_INSNS (3), /* SI */
1292 COSTS_N_INSNS (4), /* DI */
1293 COSTS_N_INSNS (5)}, /* other */
1294 0, /* cost of multiply per each bit set */
1295 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1296 COSTS_N_INSNS (35), /* HI */
1297 COSTS_N_INSNS (51), /* SI */
1298 COSTS_N_INSNS (83), /* DI */
1299 COSTS_N_INSNS (83)}, /* other */
1300 COSTS_N_INSNS (1), /* cost of movsx */
1301 COSTS_N_INSNS (1), /* cost of movzx */
1302 8, /* "large" insn */
1303 9, /* MOVE_RATIO */
1304 4, /* cost for loading QImode using movzbl */
1305 {3, 4, 3}, /* cost of loading integer registers
1306 in QImode, HImode and SImode.
1307 Relative to reg-reg move (2). */
1308 {3, 4, 3}, /* cost of storing integer registers */
1309 4, /* cost of reg,reg fld/fst */
1310 {4, 4, 12}, /* cost of loading fp registers
1311 in SFmode, DFmode and XFmode */
1312 {6, 6, 8}, /* cost of storing fp registers
1313 in SFmode, DFmode and XFmode */
1314 2, /* cost of moving MMX register */
1315 {3, 3}, /* cost of loading MMX registers
1316 in SImode and DImode */
1317 {4, 4}, /* cost of storing MMX registers
1318 in SImode and DImode */
1319 2, /* cost of moving SSE register */
1320 {4, 4, 3}, /* cost of loading SSE registers
1321 in SImode, DImode and TImode */
1322 {4, 4, 5}, /* cost of storing SSE registers
1323 in SImode, DImode and TImode */
1324 3, /* MMX or SSE register to integer */
1325 /* On K8:
1326 MOVD reg64, xmmreg Double FSTORE 4
1327 MOVD reg32, xmmreg Double FSTORE 4
1328 On AMDFAM10:
1329 MOVD reg64, xmmreg Double FADD 3
1330 1/1 1/1
1331 MOVD reg32, xmmreg Double FADD 3
1332 1/1 1/1 */
1333 32, /* size of l1 cache. */
1334 512, /* size of l2 cache. */
1335 64, /* size of prefetch block */
1336 100, /* number of parallel prefetches */
1337 2, /* Branch cost */
1338 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1339 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1340 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1341 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1342 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1343 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1344
1345 btver1_memcpy,
1346 btver1_memset,
1347 4, /* scalar_stmt_cost. */
1348 2, /* scalar load_cost. */
1349 2, /* scalar_store_cost. */
1350 6, /* vec_stmt_cost. */
1351 0, /* vec_to_scalar_cost. */
1352 2, /* scalar_to_vec_cost. */
1353 2, /* vec_align_load_cost. */
1354 2, /* vec_unalign_load_cost. */
1355 2, /* vec_store_cost. */
1356 2, /* cond_taken_branch_cost. */
1357 1, /* cond_not_taken_branch_cost. */
1358 };
1359
1360 static stringop_algs btver2_memcpy[2] = {
1361 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1362 {-1, rep_prefix_4_byte, false}}},
1363 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1364 {-1, libcall, false}}}};
1365 static stringop_algs btver2_memset[2] = {
1366 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1367 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1368 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1369 {-1, libcall, false}}}};
1370 const struct processor_costs btver2_cost = {
1371 COSTS_N_INSNS (1), /* cost of an add instruction */
1372 COSTS_N_INSNS (2), /* cost of a lea instruction */
1373 COSTS_N_INSNS (1), /* variable shift costs */
1374 COSTS_N_INSNS (1), /* constant shift costs */
1375 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1376 COSTS_N_INSNS (4), /* HI */
1377 COSTS_N_INSNS (3), /* SI */
1378 COSTS_N_INSNS (4), /* DI */
1379 COSTS_N_INSNS (5)}, /* other */
1380 0, /* cost of multiply per each bit set */
1381 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1382 COSTS_N_INSNS (35), /* HI */
1383 COSTS_N_INSNS (51), /* SI */
1384 COSTS_N_INSNS (83), /* DI */
1385 COSTS_N_INSNS (83)}, /* other */
1386 COSTS_N_INSNS (1), /* cost of movsx */
1387 COSTS_N_INSNS (1), /* cost of movzx */
1388 8, /* "large" insn */
1389 9, /* MOVE_RATIO */
1390 4, /* cost for loading QImode using movzbl */
1391 {3, 4, 3}, /* cost of loading integer registers
1392 in QImode, HImode and SImode.
1393 Relative to reg-reg move (2). */
1394 {3, 4, 3}, /* cost of storing integer registers */
1395 4, /* cost of reg,reg fld/fst */
1396 {4, 4, 12}, /* cost of loading fp registers
1397 in SFmode, DFmode and XFmode */
1398 {6, 6, 8}, /* cost of storing fp registers
1399 in SFmode, DFmode and XFmode */
1400 2, /* cost of moving MMX register */
1401 {3, 3}, /* cost of loading MMX registers
1402 in SImode and DImode */
1403 {4, 4}, /* cost of storing MMX registers
1404 in SImode and DImode */
1405 2, /* cost of moving SSE register */
1406 {4, 4, 3}, /* cost of loading SSE registers
1407 in SImode, DImode and TImode */
1408 {4, 4, 5}, /* cost of storing SSE registers
1409 in SImode, DImode and TImode */
1410 3, /* MMX or SSE register to integer */
1411 /* On K8:
1412 MOVD reg64, xmmreg Double FSTORE 4
1413 MOVD reg32, xmmreg Double FSTORE 4
1414 On AMDFAM10:
1415 MOVD reg64, xmmreg Double FADD 3
1416 1/1 1/1
1417 MOVD reg32, xmmreg Double FADD 3
1418 1/1 1/1 */
1419 32, /* size of l1 cache. */
1420 2048, /* size of l2 cache. */
1421 64, /* size of prefetch block */
1422 100, /* number of parallel prefetches */
1423 2, /* Branch cost */
1424 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1425 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1426 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1427 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1428 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1429 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1430 btver2_memcpy,
1431 btver2_memset,
1432 4, /* scalar_stmt_cost. */
1433 2, /* scalar load_cost. */
1434 2, /* scalar_store_cost. */
1435 6, /* vec_stmt_cost. */
1436 0, /* vec_to_scalar_cost. */
1437 2, /* scalar_to_vec_cost. */
1438 2, /* vec_align_load_cost. */
1439 2, /* vec_unalign_load_cost. */
1440 2, /* vec_store_cost. */
1441 2, /* cond_taken_branch_cost. */
1442 1, /* cond_not_taken_branch_cost. */
1443 };
1444
1445 static stringop_algs pentium4_memcpy[2] = {
1446 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448 static stringop_algs pentium4_memset[2] = {
1449 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1450 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1451 DUMMY_STRINGOP_ALGS};
1452
1453 static const
1454 struct processor_costs pentium4_cost = {
1455 COSTS_N_INSNS (1), /* cost of an add instruction */
1456 COSTS_N_INSNS (3), /* cost of a lea instruction */
1457 COSTS_N_INSNS (4), /* variable shift costs */
1458 COSTS_N_INSNS (4), /* constant shift costs */
1459 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1460 COSTS_N_INSNS (15), /* HI */
1461 COSTS_N_INSNS (15), /* SI */
1462 COSTS_N_INSNS (15), /* DI */
1463 COSTS_N_INSNS (15)}, /* other */
1464 0, /* cost of multiply per each bit set */
1465 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1466 COSTS_N_INSNS (56), /* HI */
1467 COSTS_N_INSNS (56), /* SI */
1468 COSTS_N_INSNS (56), /* DI */
1469 COSTS_N_INSNS (56)}, /* other */
1470 COSTS_N_INSNS (1), /* cost of movsx */
1471 COSTS_N_INSNS (1), /* cost of movzx */
1472 16, /* "large" insn */
1473 6, /* MOVE_RATIO */
1474 2, /* cost for loading QImode using movzbl */
1475 {4, 5, 4}, /* cost of loading integer registers
1476 in QImode, HImode and SImode.
1477 Relative to reg-reg move (2). */
1478 {2, 3, 2}, /* cost of storing integer registers */
1479 2, /* cost of reg,reg fld/fst */
1480 {2, 2, 6}, /* cost of loading fp registers
1481 in SFmode, DFmode and XFmode */
1482 {4, 4, 6}, /* cost of storing fp registers
1483 in SFmode, DFmode and XFmode */
1484 2, /* cost of moving MMX register */
1485 {2, 2}, /* cost of loading MMX registers
1486 in SImode and DImode */
1487 {2, 2}, /* cost of storing MMX registers
1488 in SImode and DImode */
1489 12, /* cost of moving SSE register */
1490 {12, 12, 12}, /* cost of loading SSE registers
1491 in SImode, DImode and TImode */
1492 {2, 2, 8}, /* cost of storing SSE registers
1493 in SImode, DImode and TImode */
1494 10, /* MMX or SSE register to integer */
1495 8, /* size of l1 cache. */
1496 256, /* size of l2 cache. */
1497 64, /* size of prefetch block */
1498 6, /* number of parallel prefetches */
1499 2, /* Branch cost */
1500 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1501 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1502 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1503 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1504 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1505 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1506 pentium4_memcpy,
1507 pentium4_memset,
1508 1, /* scalar_stmt_cost. */
1509 1, /* scalar load_cost. */
1510 1, /* scalar_store_cost. */
1511 1, /* vec_stmt_cost. */
1512 1, /* vec_to_scalar_cost. */
1513 1, /* scalar_to_vec_cost. */
1514 1, /* vec_align_load_cost. */
1515 2, /* vec_unalign_load_cost. */
1516 1, /* vec_store_cost. */
1517 3, /* cond_taken_branch_cost. */
1518 1, /* cond_not_taken_branch_cost. */
1519 };
1520
1521 static stringop_algs nocona_memcpy[2] = {
1522 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1523 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1524 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1525
1526 static stringop_algs nocona_memset[2] = {
1527 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1528 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1529 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1530 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1531
1532 static const
1533 struct processor_costs nocona_cost = {
1534 COSTS_N_INSNS (1), /* cost of an add instruction */
1535 COSTS_N_INSNS (1), /* cost of a lea instruction */
1536 COSTS_N_INSNS (1), /* variable shift costs */
1537 COSTS_N_INSNS (1), /* constant shift costs */
1538 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1539 COSTS_N_INSNS (10), /* HI */
1540 COSTS_N_INSNS (10), /* SI */
1541 COSTS_N_INSNS (10), /* DI */
1542 COSTS_N_INSNS (10)}, /* other */
1543 0, /* cost of multiply per each bit set */
1544 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1545 COSTS_N_INSNS (66), /* HI */
1546 COSTS_N_INSNS (66), /* SI */
1547 COSTS_N_INSNS (66), /* DI */
1548 COSTS_N_INSNS (66)}, /* other */
1549 COSTS_N_INSNS (1), /* cost of movsx */
1550 COSTS_N_INSNS (1), /* cost of movzx */
1551 16, /* "large" insn */
1552 17, /* MOVE_RATIO */
1553 4, /* cost for loading QImode using movzbl */
1554 {4, 4, 4}, /* cost of loading integer registers
1555 in QImode, HImode and SImode.
1556 Relative to reg-reg move (2). */
1557 {4, 4, 4}, /* cost of storing integer registers */
1558 3, /* cost of reg,reg fld/fst */
1559 {12, 12, 12}, /* cost of loading fp registers
1560 in SFmode, DFmode and XFmode */
1561 {4, 4, 4}, /* cost of storing fp registers
1562 in SFmode, DFmode and XFmode */
1563 6, /* cost of moving MMX register */
1564 {12, 12}, /* cost of loading MMX registers
1565 in SImode and DImode */
1566 {12, 12}, /* cost of storing MMX registers
1567 in SImode and DImode */
1568 6, /* cost of moving SSE register */
1569 {12, 12, 12}, /* cost of loading SSE registers
1570 in SImode, DImode and TImode */
1571 {12, 12, 12}, /* cost of storing SSE registers
1572 in SImode, DImode and TImode */
1573 8, /* MMX or SSE register to integer */
1574 8, /* size of l1 cache. */
1575 1024, /* size of l2 cache. */
1576 64, /* size of prefetch block */
1577 8, /* number of parallel prefetches */
1578 1, /* Branch cost */
1579 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1580 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1581 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1582 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1583 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1584 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1585 nocona_memcpy,
1586 nocona_memset,
1587 1, /* scalar_stmt_cost. */
1588 1, /* scalar load_cost. */
1589 1, /* scalar_store_cost. */
1590 1, /* vec_stmt_cost. */
1591 1, /* vec_to_scalar_cost. */
1592 1, /* scalar_to_vec_cost. */
1593 1, /* vec_align_load_cost. */
1594 2, /* vec_unalign_load_cost. */
1595 1, /* vec_store_cost. */
1596 3, /* cond_taken_branch_cost. */
1597 1, /* cond_not_taken_branch_cost. */
1598 };
1599
1600 static stringop_algs atom_memcpy[2] = {
1601 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1602 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1604 static stringop_algs atom_memset[2] = {
1605 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1606 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1607 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1608 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1609 static const
1610 struct processor_costs atom_cost = {
1611 COSTS_N_INSNS (1), /* cost of an add instruction */
1612 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1613 COSTS_N_INSNS (1), /* variable shift costs */
1614 COSTS_N_INSNS (1), /* constant shift costs */
1615 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1616 COSTS_N_INSNS (4), /* HI */
1617 COSTS_N_INSNS (3), /* SI */
1618 COSTS_N_INSNS (4), /* DI */
1619 COSTS_N_INSNS (2)}, /* other */
1620 0, /* cost of multiply per each bit set */
1621 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1622 COSTS_N_INSNS (26), /* HI */
1623 COSTS_N_INSNS (42), /* SI */
1624 COSTS_N_INSNS (74), /* DI */
1625 COSTS_N_INSNS (74)}, /* other */
1626 COSTS_N_INSNS (1), /* cost of movsx */
1627 COSTS_N_INSNS (1), /* cost of movzx */
1628 8, /* "large" insn */
1629 17, /* MOVE_RATIO */
1630 4, /* cost for loading QImode using movzbl */
1631 {4, 4, 4}, /* cost of loading integer registers
1632 in QImode, HImode and SImode.
1633 Relative to reg-reg move (2). */
1634 {4, 4, 4}, /* cost of storing integer registers */
1635 4, /* cost of reg,reg fld/fst */
1636 {12, 12, 12}, /* cost of loading fp registers
1637 in SFmode, DFmode and XFmode */
1638 {6, 6, 8}, /* cost of storing fp registers
1639 in SFmode, DFmode and XFmode */
1640 2, /* cost of moving MMX register */
1641 {8, 8}, /* cost of loading MMX registers
1642 in SImode and DImode */
1643 {8, 8}, /* cost of storing MMX registers
1644 in SImode and DImode */
1645 2, /* cost of moving SSE register */
1646 {8, 8, 8}, /* cost of loading SSE registers
1647 in SImode, DImode and TImode */
1648 {8, 8, 8}, /* cost of storing SSE registers
1649 in SImode, DImode and TImode */
1650 5, /* MMX or SSE register to integer */
1651 32, /* size of l1 cache. */
1652 256, /* size of l2 cache. */
1653 64, /* size of prefetch block */
1654 6, /* number of parallel prefetches */
1655 3, /* Branch cost */
1656 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1657 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1658 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1659 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1660 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1661 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1662 atom_memcpy,
1663 atom_memset,
1664 1, /* scalar_stmt_cost. */
1665 1, /* scalar load_cost. */
1666 1, /* scalar_store_cost. */
1667 1, /* vec_stmt_cost. */
1668 1, /* vec_to_scalar_cost. */
1669 1, /* scalar_to_vec_cost. */
1670 1, /* vec_align_load_cost. */
1671 2, /* vec_unalign_load_cost. */
1672 1, /* vec_store_cost. */
1673 3, /* cond_taken_branch_cost. */
1674 1, /* cond_not_taken_branch_cost. */
1675 };
1676
1677 static stringop_algs slm_memcpy[2] = {
1678 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1679 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1680 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1681 static stringop_algs slm_memset[2] = {
1682 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1683 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1684 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1685 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 static const
1687 struct processor_costs slm_cost = {
1688 COSTS_N_INSNS (1), /* cost of an add instruction */
1689 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1690 COSTS_N_INSNS (1), /* variable shift costs */
1691 COSTS_N_INSNS (1), /* constant shift costs */
1692 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1693 COSTS_N_INSNS (3), /* HI */
1694 COSTS_N_INSNS (3), /* SI */
1695 COSTS_N_INSNS (4), /* DI */
1696 COSTS_N_INSNS (2)}, /* other */
1697 0, /* cost of multiply per each bit set */
1698 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1699 COSTS_N_INSNS (26), /* HI */
1700 COSTS_N_INSNS (42), /* SI */
1701 COSTS_N_INSNS (74), /* DI */
1702 COSTS_N_INSNS (74)}, /* other */
1703 COSTS_N_INSNS (1), /* cost of movsx */
1704 COSTS_N_INSNS (1), /* cost of movzx */
1705 8, /* "large" insn */
1706 17, /* MOVE_RATIO */
1707 4, /* cost for loading QImode using movzbl */
1708 {4, 4, 4}, /* cost of loading integer registers
1709 in QImode, HImode and SImode.
1710 Relative to reg-reg move (2). */
1711 {4, 4, 4}, /* cost of storing integer registers */
1712 4, /* cost of reg,reg fld/fst */
1713 {12, 12, 12}, /* cost of loading fp registers
1714 in SFmode, DFmode and XFmode */
1715 {6, 6, 8}, /* cost of storing fp registers
1716 in SFmode, DFmode and XFmode */
1717 2, /* cost of moving MMX register */
1718 {8, 8}, /* cost of loading MMX registers
1719 in SImode and DImode */
1720 {8, 8}, /* cost of storing MMX registers
1721 in SImode and DImode */
1722 2, /* cost of moving SSE register */
1723 {8, 8, 8}, /* cost of loading SSE registers
1724 in SImode, DImode and TImode */
1725 {8, 8, 8}, /* cost of storing SSE registers
1726 in SImode, DImode and TImode */
1727 5, /* MMX or SSE register to integer */
1728 32, /* size of l1 cache. */
1729 256, /* size of l2 cache. */
1730 64, /* size of prefetch block */
1731 6, /* number of parallel prefetches */
1732 3, /* Branch cost */
1733 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1734 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1735 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1736 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1737 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1738 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1739 slm_memcpy,
1740 slm_memset,
1741 1, /* scalar_stmt_cost. */
1742 1, /* scalar load_cost. */
1743 1, /* scalar_store_cost. */
1744 1, /* vec_stmt_cost. */
1745 4, /* vec_to_scalar_cost. */
1746 1, /* scalar_to_vec_cost. */
1747 1, /* vec_align_load_cost. */
1748 2, /* vec_unalign_load_cost. */
1749 1, /* vec_store_cost. */
1750 3, /* cond_taken_branch_cost. */
1751 1, /* cond_not_taken_branch_cost. */
1752 };
1753
1754 static stringop_algs intel_memcpy[2] = {
1755 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1756 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1757 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1758 static stringop_algs intel_memset[2] = {
1759 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1760 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1761 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1762 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 static const
1764 struct processor_costs intel_cost = {
1765 COSTS_N_INSNS (1), /* cost of an add instruction */
1766 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1767 COSTS_N_INSNS (1), /* variable shift costs */
1768 COSTS_N_INSNS (1), /* constant shift costs */
1769 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1770 COSTS_N_INSNS (3), /* HI */
1771 COSTS_N_INSNS (3), /* SI */
1772 COSTS_N_INSNS (4), /* DI */
1773 COSTS_N_INSNS (2)}, /* other */
1774 0, /* cost of multiply per each bit set */
1775 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1776 COSTS_N_INSNS (26), /* HI */
1777 COSTS_N_INSNS (42), /* SI */
1778 COSTS_N_INSNS (74), /* DI */
1779 COSTS_N_INSNS (74)}, /* other */
1780 COSTS_N_INSNS (1), /* cost of movsx */
1781 COSTS_N_INSNS (1), /* cost of movzx */
1782 8, /* "large" insn */
1783 17, /* MOVE_RATIO */
1784 4, /* cost for loading QImode using movzbl */
1785 {4, 4, 4}, /* cost of loading integer registers
1786 in QImode, HImode and SImode.
1787 Relative to reg-reg move (2). */
1788 {4, 4, 4}, /* cost of storing integer registers */
1789 4, /* cost of reg,reg fld/fst */
1790 {12, 12, 12}, /* cost of loading fp registers
1791 in SFmode, DFmode and XFmode */
1792 {6, 6, 8}, /* cost of storing fp registers
1793 in SFmode, DFmode and XFmode */
1794 2, /* cost of moving MMX register */
1795 {8, 8}, /* cost of loading MMX registers
1796 in SImode and DImode */
1797 {8, 8}, /* cost of storing MMX registers
1798 in SImode and DImode */
1799 2, /* cost of moving SSE register */
1800 {8, 8, 8}, /* cost of loading SSE registers
1801 in SImode, DImode and TImode */
1802 {8, 8, 8}, /* cost of storing SSE registers
1803 in SImode, DImode and TImode */
1804 5, /* MMX or SSE register to integer */
1805 32, /* size of l1 cache. */
1806 256, /* size of l2 cache. */
1807 64, /* size of prefetch block */
1808 6, /* number of parallel prefetches */
1809 3, /* Branch cost */
1810 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1811 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1812 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1813 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1814 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1815 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 intel_memcpy,
1817 intel_memset,
1818 1, /* scalar_stmt_cost. */
1819 1, /* scalar load_cost. */
1820 1, /* scalar_store_cost. */
1821 1, /* vec_stmt_cost. */
1822 4, /* vec_to_scalar_cost. */
1823 1, /* scalar_to_vec_cost. */
1824 1, /* vec_align_load_cost. */
1825 2, /* vec_unalign_load_cost. */
1826 1, /* vec_store_cost. */
1827 3, /* cond_taken_branch_cost. */
1828 1, /* cond_not_taken_branch_cost. */
1829 };
1830
1831 /* Generic should produce code tuned for Core-i7 (and newer chips)
1832 and btver1 (and newer chips). */
1833
1834 static stringop_algs generic_memcpy[2] = {
1835 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1836 {-1, libcall, false}}},
1837 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1838 {-1, libcall, false}}}};
1839 static stringop_algs generic_memset[2] = {
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1841 {-1, libcall, false}}},
1842 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1843 {-1, libcall, false}}}};
1844 static const
1845 struct processor_costs generic_cost = {
1846 COSTS_N_INSNS (1), /* cost of an add instruction */
1847 /* On all chips taken into consideration lea is 2 cycles and more. With
1848 this cost however our current implementation of synth_mult results in
1849 use of unnecessary temporary registers causing regression on several
1850 SPECfp benchmarks. */
1851 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1852 COSTS_N_INSNS (1), /* variable shift costs */
1853 COSTS_N_INSNS (1), /* constant shift costs */
1854 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1855 COSTS_N_INSNS (4), /* HI */
1856 COSTS_N_INSNS (3), /* SI */
1857 COSTS_N_INSNS (4), /* DI */
1858 COSTS_N_INSNS (2)}, /* other */
1859 0, /* cost of multiply per each bit set */
1860 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1861 COSTS_N_INSNS (26), /* HI */
1862 COSTS_N_INSNS (42), /* SI */
1863 COSTS_N_INSNS (74), /* DI */
1864 COSTS_N_INSNS (74)}, /* other */
1865 COSTS_N_INSNS (1), /* cost of movsx */
1866 COSTS_N_INSNS (1), /* cost of movzx */
1867 8, /* "large" insn */
1868 17, /* MOVE_RATIO */
1869 4, /* cost for loading QImode using movzbl */
1870 {4, 4, 4}, /* cost of loading integer registers
1871 in QImode, HImode and SImode.
1872 Relative to reg-reg move (2). */
1873 {4, 4, 4}, /* cost of storing integer registers */
1874 4, /* cost of reg,reg fld/fst */
1875 {12, 12, 12}, /* cost of loading fp registers
1876 in SFmode, DFmode and XFmode */
1877 {6, 6, 8}, /* cost of storing fp registers
1878 in SFmode, DFmode and XFmode */
1879 2, /* cost of moving MMX register */
1880 {8, 8}, /* cost of loading MMX registers
1881 in SImode and DImode */
1882 {8, 8}, /* cost of storing MMX registers
1883 in SImode and DImode */
1884 2, /* cost of moving SSE register */
1885 {8, 8, 8}, /* cost of loading SSE registers
1886 in SImode, DImode and TImode */
1887 {8, 8, 8}, /* cost of storing SSE registers
1888 in SImode, DImode and TImode */
1889 5, /* MMX or SSE register to integer */
1890 32, /* size of l1 cache. */
1891 512, /* size of l2 cache. */
1892 64, /* size of prefetch block */
1893 6, /* number of parallel prefetches */
1894 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1895 value is increased to perhaps more appropriate value of 5. */
1896 3, /* Branch cost */
1897 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1898 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1899 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1900 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1901 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1902 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1903 generic_memcpy,
1904 generic_memset,
1905 1, /* scalar_stmt_cost. */
1906 1, /* scalar load_cost. */
1907 1, /* scalar_store_cost. */
1908 1, /* vec_stmt_cost. */
1909 1, /* vec_to_scalar_cost. */
1910 1, /* scalar_to_vec_cost. */
1911 1, /* vec_align_load_cost. */
1912 2, /* vec_unalign_load_cost. */
1913 1, /* vec_store_cost. */
1914 3, /* cond_taken_branch_cost. */
1915 1, /* cond_not_taken_branch_cost. */
1916 };
1917
1918 /* core_cost should produce code tuned for Core familly of CPUs. */
1919 static stringop_algs core_memcpy[2] = {
1920 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1921 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1922 {-1, libcall, false}}}};
1923 static stringop_algs core_memset[2] = {
1924 {libcall, {{6, loop_1_byte, true},
1925 {24, loop, true},
1926 {8192, rep_prefix_4_byte, true},
1927 {-1, libcall, false}}},
1928 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1929 {-1, libcall, false}}}};
1930
1931 static const
1932 struct processor_costs core_cost = {
1933 COSTS_N_INSNS (1), /* cost of an add instruction */
1934 /* On all chips taken into consideration lea is 2 cycles and more. With
1935 this cost however our current implementation of synth_mult results in
1936 use of unnecessary temporary registers causing regression on several
1937 SPECfp benchmarks. */
1938 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1939 COSTS_N_INSNS (1), /* variable shift costs */
1940 COSTS_N_INSNS (1), /* constant shift costs */
1941 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1942 COSTS_N_INSNS (4), /* HI */
1943 COSTS_N_INSNS (3), /* SI */
1944 COSTS_N_INSNS (4), /* DI */
1945 COSTS_N_INSNS (2)}, /* other */
1946 0, /* cost of multiply per each bit set */
1947 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1948 COSTS_N_INSNS (26), /* HI */
1949 COSTS_N_INSNS (42), /* SI */
1950 COSTS_N_INSNS (74), /* DI */
1951 COSTS_N_INSNS (74)}, /* other */
1952 COSTS_N_INSNS (1), /* cost of movsx */
1953 COSTS_N_INSNS (1), /* cost of movzx */
1954 8, /* "large" insn */
1955 17, /* MOVE_RATIO */
1956 4, /* cost for loading QImode using movzbl */
1957 {4, 4, 4}, /* cost of loading integer registers
1958 in QImode, HImode and SImode.
1959 Relative to reg-reg move (2). */
1960 {4, 4, 4}, /* cost of storing integer registers */
1961 4, /* cost of reg,reg fld/fst */
1962 {12, 12, 12}, /* cost of loading fp registers
1963 in SFmode, DFmode and XFmode */
1964 {6, 6, 8}, /* cost of storing fp registers
1965 in SFmode, DFmode and XFmode */
1966 2, /* cost of moving MMX register */
1967 {8, 8}, /* cost of loading MMX registers
1968 in SImode and DImode */
1969 {8, 8}, /* cost of storing MMX registers
1970 in SImode and DImode */
1971 2, /* cost of moving SSE register */
1972 {8, 8, 8}, /* cost of loading SSE registers
1973 in SImode, DImode and TImode */
1974 {8, 8, 8}, /* cost of storing SSE registers
1975 in SImode, DImode and TImode */
1976 5, /* MMX or SSE register to integer */
1977 64, /* size of l1 cache. */
1978 512, /* size of l2 cache. */
1979 64, /* size of prefetch block */
1980 6, /* number of parallel prefetches */
1981 /* FIXME perhaps more appropriate value is 5. */
1982 3, /* Branch cost */
1983 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1984 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1985 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1986 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1987 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1988 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1989 core_memcpy,
1990 core_memset,
1991 1, /* scalar_stmt_cost. */
1992 1, /* scalar load_cost. */
1993 1, /* scalar_store_cost. */
1994 1, /* vec_stmt_cost. */
1995 1, /* vec_to_scalar_cost. */
1996 1, /* scalar_to_vec_cost. */
1997 1, /* vec_align_load_cost. */
1998 2, /* vec_unalign_load_cost. */
1999 1, /* vec_store_cost. */
2000 3, /* cond_taken_branch_cost. */
2001 1, /* cond_not_taken_branch_cost. */
2002 };
2003
2004
2005 /* Set by -mtune. */
2006 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2007
2008 /* Set by -mtune or -Os. */
2009 const struct processor_costs *ix86_cost = &pentium_cost;
2010
2011 /* Processor feature/optimization bitmasks. */
2012 #define m_386 (1<<PROCESSOR_I386)
2013 #define m_486 (1<<PROCESSOR_I486)
2014 #define m_PENT (1<<PROCESSOR_PENTIUM)
2015 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2016 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2017 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2018 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2019 #define m_CORE2 (1<<PROCESSOR_CORE2)
2020 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2021 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2022 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2023 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2024 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2025 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2026 #define m_INTEL (1<<PROCESSOR_INTEL)
2027
2028 #define m_GEODE (1<<PROCESSOR_GEODE)
2029 #define m_K6 (1<<PROCESSOR_K6)
2030 #define m_K6_GEODE (m_K6 | m_GEODE)
2031 #define m_K8 (1<<PROCESSOR_K8)
2032 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2033 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2034 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2035 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2036 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2037 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2038 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2039 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2040 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2041 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2042 #define m_BTVER (m_BTVER1 | m_BTVER2)
2043 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044
2045 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046
2047 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2048 #undef DEF_TUNE
2049 #define DEF_TUNE(tune, name, selector) name,
2050 #include "x86-tune.def"
2051 #undef DEF_TUNE
2052 };
2053
2054 /* Feature tests against the various tunings. */
2055 unsigned char ix86_tune_features[X86_TUNE_LAST];
2056
2057 /* Feature tests against the various tunings used to create ix86_tune_features
2058 based on the processor mask. */
2059 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2060 #undef DEF_TUNE
2061 #define DEF_TUNE(tune, name, selector) selector,
2062 #include "x86-tune.def"
2063 #undef DEF_TUNE
2064 };
2065
2066 /* Feature tests against the various architecture variations. */
2067 unsigned char ix86_arch_features[X86_ARCH_LAST];
2068
2069 /* Feature tests against the various architecture variations, used to create
2070 ix86_arch_features based on the processor mask. */
2071 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2072 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2073 ~(m_386 | m_486 | m_PENT | m_K6),
2074
2075 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2076 ~m_386,
2077
2078 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2079 ~(m_386 | m_486),
2080
2081 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2082 ~m_386,
2083
2084 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2085 ~m_386,
2086 };
2087
2088 /* In case the average insn count for single function invocation is
2089 lower than this constant, emit fast (but longer) prologue and
2090 epilogue code. */
2091 #define FAST_PROLOGUE_INSN_COUNT 20
2092
2093 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2094 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2095 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2096 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2097
2098 /* Array of the smallest class containing reg number REGNO, indexed by
2099 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100
2101 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2102 {
2103 /* ax, dx, cx, bx */
2104 AREG, DREG, CREG, BREG,
2105 /* si, di, bp, sp */
2106 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2107 /* FP registers */
2108 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2109 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2110 /* arg pointer */
2111 NON_Q_REGS,
2112 /* flags, fpsr, fpcr, frame */
2113 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2114 /* SSE registers */
2115 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2116 SSE_REGS, SSE_REGS,
2117 /* MMX registers */
2118 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2119 MMX_REGS, MMX_REGS,
2120 /* REX registers */
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2123 /* SSE REX registers */
2124 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2125 SSE_REGS, SSE_REGS,
2126 /* AVX-512 SSE registers */
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2131 /* Mask registers. */
2132 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2134 };
2135
2136 /* The "default" register map used in 32bit mode. */
2137
2138 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2139 {
2140 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2141 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2142 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2143 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2144 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2149 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2150 };
2151
2152 /* The "default" register map used in 64bit mode. */
2153
2154 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2155 {
2156 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2157 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2158 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2159 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2160 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2161 8,9,10,11,12,13,14,15, /* extended integer registers */
2162 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2163 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2164 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2165 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2166 };
2167
2168 /* Define the register numbers to be used in Dwarf debugging information.
2169 The SVR4 reference port C compiler uses the following register numbers
2170 in its Dwarf output code:
2171 0 for %eax (gcc regno = 0)
2172 1 for %ecx (gcc regno = 2)
2173 2 for %edx (gcc regno = 1)
2174 3 for %ebx (gcc regno = 3)
2175 4 for %esp (gcc regno = 7)
2176 5 for %ebp (gcc regno = 6)
2177 6 for %esi (gcc regno = 4)
2178 7 for %edi (gcc regno = 5)
2179 The following three DWARF register numbers are never generated by
2180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2181 believes these numbers have these meanings.
2182 8 for %eip (no gcc equivalent)
2183 9 for %eflags (gcc regno = 17)
2184 10 for %trapno (no gcc equivalent)
2185 It is not at all clear how we should number the FP stack registers
2186 for the x86 architecture. If the version of SDB on x86/svr4 were
2187 a bit less brain dead with respect to floating-point then we would
2188 have a precedent to follow with respect to DWARF register numbers
2189 for x86 FP registers, but the SDB on x86/svr4 is so completely
2190 broken with respect to FP registers that it is hardly worth thinking
2191 of it as something to strive for compatibility with.
2192 The version of x86/svr4 SDB I have at the moment does (partially)
2193 seem to believe that DWARF register number 11 is associated with
2194 the x86 register %st(0), but that's about all. Higher DWARF
2195 register numbers don't seem to be associated with anything in
2196 particular, and even for DWARF regno 11, SDB only seems to under-
2197 stand that it should say that a variable lives in %st(0) (when
2198 asked via an `=' command) if we said it was in DWARF regno 11,
2199 but SDB still prints garbage when asked for the value of the
2200 variable in question (via a `/' command).
2201 (Also note that the labels SDB prints for various FP stack regs
2202 when doing an `x' command are all wrong.)
2203 Note that these problems generally don't affect the native SVR4
2204 C compiler because it doesn't allow the use of -O with -g and
2205 because when it is *not* optimizing, it allocates a memory
2206 location for each floating-point variable, and the memory
2207 location is what gets described in the DWARF AT_location
2208 attribute for the variable in question.
2209 Regardless of the severe mental illness of the x86/svr4 SDB, we
2210 do something sensible here and we use the following DWARF
2211 register numbers. Note that these are all stack-top-relative
2212 numbers.
2213 11 for %st(0) (gcc regno = 8)
2214 12 for %st(1) (gcc regno = 9)
2215 13 for %st(2) (gcc regno = 10)
2216 14 for %st(3) (gcc regno = 11)
2217 15 for %st(4) (gcc regno = 12)
2218 16 for %st(5) (gcc regno = 13)
2219 17 for %st(6) (gcc regno = 14)
2220 18 for %st(7) (gcc regno = 15)
2221 */
2222 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2223 {
2224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2233 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2234 };
2235
2236 /* Define parameter passing and return registers. */
2237
2238 static int const x86_64_int_parameter_registers[6] =
2239 {
2240 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2241 };
2242
2243 static int const x86_64_ms_abi_int_parameter_registers[4] =
2244 {
2245 CX_REG, DX_REG, R8_REG, R9_REG
2246 };
2247
2248 static int const x86_64_int_return_registers[4] =
2249 {
2250 AX_REG, DX_REG, DI_REG, SI_REG
2251 };
2252
2253 /* Additional registers that are clobbered by SYSV calls. */
2254
2255 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2256 {
2257 SI_REG, DI_REG,
2258 XMM6_REG, XMM7_REG,
2259 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2260 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2261 };
2262
2263 /* Define the structure for the machine field in struct function. */
2264
2265 struct GTY(()) stack_local_entry {
2266 unsigned short mode;
2267 unsigned short n;
2268 rtx rtl;
2269 struct stack_local_entry *next;
2270 };
2271
2272 /* Structure describing stack frame layout.
2273 Stack grows downward:
2274
2275 [arguments]
2276 <- ARG_POINTER
2277 saved pc
2278
2279 saved static chain if ix86_static_chain_on_stack
2280
2281 saved frame pointer if frame_pointer_needed
2282 <- HARD_FRAME_POINTER
2283 [saved regs]
2284 <- regs_save_offset
2285 [padding0]
2286
2287 [saved SSE regs]
2288 <- sse_regs_save_offset
2289 [padding1] |
2290 | <- FRAME_POINTER
2291 [va_arg registers] |
2292 |
2293 [frame] |
2294 |
2295 [padding2] | = to_allocate
2296 <- STACK_POINTER
2297 */
2298 struct ix86_frame
2299 {
2300 int nsseregs;
2301 int nregs;
2302 int va_arg_size;
2303 int red_zone_size;
2304 int outgoing_arguments_size;
2305
2306 /* The offsets relative to ARG_POINTER. */
2307 HOST_WIDE_INT frame_pointer_offset;
2308 HOST_WIDE_INT hard_frame_pointer_offset;
2309 HOST_WIDE_INT stack_pointer_offset;
2310 HOST_WIDE_INT hfp_save_offset;
2311 HOST_WIDE_INT reg_save_offset;
2312 HOST_WIDE_INT sse_reg_save_offset;
2313
2314 /* When save_regs_using_mov is set, emit prologue using
2315 move instead of push instructions. */
2316 bool save_regs_using_mov;
2317 };
2318
2319 /* Which cpu are we scheduling for. */
2320 enum attr_cpu ix86_schedule;
2321
2322 /* Which cpu are we optimizing for. */
2323 enum processor_type ix86_tune;
2324
2325 /* Which instruction set architecture to use. */
2326 enum processor_type ix86_arch;
2327
2328 /* True if processor has SSE prefetch instruction. */
2329 unsigned char x86_prefetch_sse;
2330
2331 /* -mstackrealign option */
2332 static const char ix86_force_align_arg_pointer_string[]
2333 = "force_align_arg_pointer";
2334
2335 static rtx (*ix86_gen_leave) (void);
2336 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2339 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2340 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2343 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2346 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2347
2348 /* Preferred alignment for stack boundary in bits. */
2349 unsigned int ix86_preferred_stack_boundary;
2350
2351 /* Alignment for incoming stack boundary in bits specified at
2352 command line. */
2353 static unsigned int ix86_user_incoming_stack_boundary;
2354
2355 /* Default alignment for incoming stack boundary in bits. */
2356 static unsigned int ix86_default_incoming_stack_boundary;
2357
2358 /* Alignment for incoming stack boundary in bits. */
2359 unsigned int ix86_incoming_stack_boundary;
2360
2361 /* Calling abi specific va_list type nodes. */
2362 static GTY(()) tree sysv_va_list_type_node;
2363 static GTY(()) tree ms_va_list_type_node;
2364
2365 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2366 char internal_label_prefix[16];
2367 int internal_label_prefix_len;
2368
2369 /* Fence to use after loop using movnt. */
2370 tree x86_mfence;
2371
2372 /* Register class used for passing given 64bit part of the argument.
2373 These represent classes as documented by the PS ABI, with the exception
2374 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2375 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2376
2377 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2378 whenever possible (upper half does contain padding). */
2379 enum x86_64_reg_class
2380 {
2381 X86_64_NO_CLASS,
2382 X86_64_INTEGER_CLASS,
2383 X86_64_INTEGERSI_CLASS,
2384 X86_64_SSE_CLASS,
2385 X86_64_SSESF_CLASS,
2386 X86_64_SSEDF_CLASS,
2387 X86_64_SSEUP_CLASS,
2388 X86_64_X87_CLASS,
2389 X86_64_X87UP_CLASS,
2390 X86_64_COMPLEX_X87_CLASS,
2391 X86_64_MEMORY_CLASS
2392 };
2393
2394 #define MAX_CLASSES 8
2395
2396 /* Table of constants used by fldpi, fldln2, etc.... */
2397 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2398 static bool ext_80387_constants_init = 0;
2399
2400 \f
2401 static struct machine_function * ix86_init_machine_status (void);
2402 static rtx ix86_function_value (const_tree, const_tree, bool);
2403 static bool ix86_function_value_regno_p (const unsigned int);
2404 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2405 const_tree);
2406 static rtx ix86_static_chain (const_tree, bool);
2407 static int ix86_function_regparm (const_tree, const_tree);
2408 static void ix86_compute_frame_layout (struct ix86_frame *);
2409 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2410 rtx, rtx, int);
2411 static void ix86_add_new_builtins (HOST_WIDE_INT);
2412 static tree ix86_canonical_va_list_type (tree);
2413 static void predict_jump (int);
2414 static unsigned int split_stack_prologue_scratch_regno (void);
2415 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2416
2417 enum ix86_function_specific_strings
2418 {
2419 IX86_FUNCTION_SPECIFIC_ARCH,
2420 IX86_FUNCTION_SPECIFIC_TUNE,
2421 IX86_FUNCTION_SPECIFIC_MAX
2422 };
2423
2424 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2425 const char *, enum fpmath_unit, bool);
2426 static void ix86_function_specific_save (struct cl_target_option *,
2427 struct gcc_options *opts);
2428 static void ix86_function_specific_restore (struct gcc_options *opts,
2429 struct cl_target_option *);
2430 static void ix86_function_specific_print (FILE *, int,
2431 struct cl_target_option *);
2432 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2433 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2434 struct gcc_options *,
2435 struct gcc_options *,
2436 struct gcc_options *);
2437 static bool ix86_can_inline_p (tree, tree);
2438 static void ix86_set_current_function (tree);
2439 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2440
2441 static enum calling_abi ix86_function_abi (const_tree);
2442
2443 \f
2444 #ifndef SUBTARGET32_DEFAULT_CPU
2445 #define SUBTARGET32_DEFAULT_CPU "i386"
2446 #endif
2447
2448 /* Whether -mtune= or -march= were specified */
2449 static int ix86_tune_defaulted;
2450 static int ix86_arch_specified;
2451
2452 /* Vectorization library interface and handlers. */
2453 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2454
2455 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2456 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2457
2458 /* Processor target table, indexed by processor number */
2459 struct ptt
2460 {
2461 const char *const name; /* processor name */
2462 const struct processor_costs *cost; /* Processor costs */
2463 const int align_loop; /* Default alignments. */
2464 const int align_loop_max_skip;
2465 const int align_jump;
2466 const int align_jump_max_skip;
2467 const int align_func;
2468 };
2469
2470 /* This table must be in sync with enum processor_type in i386.h. */
2471 static const struct ptt processor_target_table[PROCESSOR_max] =
2472 {
2473 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2474 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2475 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2476 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2477 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2478 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2479 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2480 {"core2", &core_cost, 16, 10, 16, 10, 16},
2481 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2482 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2483 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2484 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2485 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2486 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2487 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2488 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2489 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2490 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2491 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2492 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2493 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2494 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2495 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2496 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2497 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2498 };
2499 \f
2500 static unsigned int
2501 rest_of_handle_insert_vzeroupper (void)
2502 {
2503 int i;
2504
2505 /* vzeroupper instructions are inserted immediately after reload to
2506 account for possible spills from 256bit registers. The pass
2507 reuses mode switching infrastructure by re-running mode insertion
2508 pass, so disable entities that have already been processed. */
2509 for (i = 0; i < MAX_386_ENTITIES; i++)
2510 ix86_optimize_mode_switching[i] = 0;
2511
2512 ix86_optimize_mode_switching[AVX_U128] = 1;
2513
2514 /* Call optimize_mode_switching. */
2515 g->get_passes ()->execute_pass_mode_switching ();
2516 return 0;
2517 }
2518
2519 namespace {
2520
2521 const pass_data pass_data_insert_vzeroupper =
2522 {
2523 RTL_PASS, /* type */
2524 "vzeroupper", /* name */
2525 OPTGROUP_NONE, /* optinfo_flags */
2526 TV_NONE, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2532 };
2533
2534 class pass_insert_vzeroupper : public rtl_opt_pass
2535 {
2536 public:
2537 pass_insert_vzeroupper(gcc::context *ctxt)
2538 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2539 {}
2540
2541 /* opt_pass methods: */
2542 virtual bool gate (function *)
2543 {
2544 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2545 }
2546
2547 virtual unsigned int execute (function *)
2548 {
2549 return rest_of_handle_insert_vzeroupper ();
2550 }
2551
2552 }; // class pass_insert_vzeroupper
2553
2554 } // anon namespace
2555
2556 rtl_opt_pass *
2557 make_pass_insert_vzeroupper (gcc::context *ctxt)
2558 {
2559 return new pass_insert_vzeroupper (ctxt);
2560 }
2561
2562 /* Return true if a red-zone is in use. */
2563
2564 static inline bool
2565 ix86_using_red_zone (void)
2566 {
2567 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2568 }
2569 \f
2570 /* Return a string that documents the current -m options. The caller is
2571 responsible for freeing the string. */
2572
2573 static char *
2574 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2575 const char *tune, enum fpmath_unit fpmath,
2576 bool add_nl_p)
2577 {
2578 struct ix86_target_opts
2579 {
2580 const char *option; /* option string */
2581 HOST_WIDE_INT mask; /* isa mask options */
2582 };
2583
2584 /* This table is ordered so that options like -msse4.2 that imply
2585 preceding options while match those first. */
2586 static struct ix86_target_opts isa_opts[] =
2587 {
2588 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2589 { "-mfma", OPTION_MASK_ISA_FMA },
2590 { "-mxop", OPTION_MASK_ISA_XOP },
2591 { "-mlwp", OPTION_MASK_ISA_LWP },
2592 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2593 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2594 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2595 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2596 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2597 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2598 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2599 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2600 { "-msse3", OPTION_MASK_ISA_SSE3 },
2601 { "-msse2", OPTION_MASK_ISA_SSE2 },
2602 { "-msse", OPTION_MASK_ISA_SSE },
2603 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2604 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2605 { "-mmmx", OPTION_MASK_ISA_MMX },
2606 { "-mabm", OPTION_MASK_ISA_ABM },
2607 { "-mbmi", OPTION_MASK_ISA_BMI },
2608 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2609 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2610 { "-mhle", OPTION_MASK_ISA_HLE },
2611 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2612 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2613 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2614 { "-madx", OPTION_MASK_ISA_ADX },
2615 { "-mtbm", OPTION_MASK_ISA_TBM },
2616 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2617 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2618 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2619 { "-maes", OPTION_MASK_ISA_AES },
2620 { "-msha", OPTION_MASK_ISA_SHA },
2621 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2622 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2623 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2624 { "-mf16c", OPTION_MASK_ISA_F16C },
2625 { "-mrtm", OPTION_MASK_ISA_RTM },
2626 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2627 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2628 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2629 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2630 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2631 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2632 };
2633
2634 /* Flag options. */
2635 static struct ix86_target_opts flag_opts[] =
2636 {
2637 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2638 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2639 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2640 { "-m80387", MASK_80387 },
2641 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2642 { "-malign-double", MASK_ALIGN_DOUBLE },
2643 { "-mcld", MASK_CLD },
2644 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2645 { "-mieee-fp", MASK_IEEE_FP },
2646 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2647 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2648 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2649 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2650 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2651 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2652 { "-mno-red-zone", MASK_NO_RED_ZONE },
2653 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2654 { "-mrecip", MASK_RECIP },
2655 { "-mrtd", MASK_RTD },
2656 { "-msseregparm", MASK_SSEREGPARM },
2657 { "-mstack-arg-probe", MASK_STACK_PROBE },
2658 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2659 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2660 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2661 { "-mvzeroupper", MASK_VZEROUPPER },
2662 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2663 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2664 { "-mprefer-avx128", MASK_PREFER_AVX128},
2665 };
2666
2667 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2668
2669 char isa_other[40];
2670 char target_other[40];
2671 unsigned num = 0;
2672 unsigned i, j;
2673 char *ret;
2674 char *ptr;
2675 size_t len;
2676 size_t line_len;
2677 size_t sep_len;
2678 const char *abi;
2679
2680 memset (opts, '\0', sizeof (opts));
2681
2682 /* Add -march= option. */
2683 if (arch)
2684 {
2685 opts[num][0] = "-march=";
2686 opts[num++][1] = arch;
2687 }
2688
2689 /* Add -mtune= option. */
2690 if (tune)
2691 {
2692 opts[num][0] = "-mtune=";
2693 opts[num++][1] = tune;
2694 }
2695
2696 /* Add -m32/-m64/-mx32. */
2697 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2698 {
2699 if ((isa & OPTION_MASK_ABI_64) != 0)
2700 abi = "-m64";
2701 else
2702 abi = "-mx32";
2703 isa &= ~ (OPTION_MASK_ISA_64BIT
2704 | OPTION_MASK_ABI_64
2705 | OPTION_MASK_ABI_X32);
2706 }
2707 else
2708 abi = "-m32";
2709 opts[num++][0] = abi;
2710
2711 /* Pick out the options in isa options. */
2712 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2713 {
2714 if ((isa & isa_opts[i].mask) != 0)
2715 {
2716 opts[num++][0] = isa_opts[i].option;
2717 isa &= ~ isa_opts[i].mask;
2718 }
2719 }
2720
2721 if (isa && add_nl_p)
2722 {
2723 opts[num++][0] = isa_other;
2724 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2725 isa);
2726 }
2727
2728 /* Add flag options. */
2729 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2730 {
2731 if ((flags & flag_opts[i].mask) != 0)
2732 {
2733 opts[num++][0] = flag_opts[i].option;
2734 flags &= ~ flag_opts[i].mask;
2735 }
2736 }
2737
2738 if (flags && add_nl_p)
2739 {
2740 opts[num++][0] = target_other;
2741 sprintf (target_other, "(other flags: %#x)", flags);
2742 }
2743
2744 /* Add -fpmath= option. */
2745 if (fpmath)
2746 {
2747 opts[num][0] = "-mfpmath=";
2748 switch ((int) fpmath)
2749 {
2750 case FPMATH_387:
2751 opts[num++][1] = "387";
2752 break;
2753
2754 case FPMATH_SSE:
2755 opts[num++][1] = "sse";
2756 break;
2757
2758 case FPMATH_387 | FPMATH_SSE:
2759 opts[num++][1] = "sse+387";
2760 break;
2761
2762 default:
2763 gcc_unreachable ();
2764 }
2765 }
2766
2767 /* Any options? */
2768 if (num == 0)
2769 return NULL;
2770
2771 gcc_assert (num < ARRAY_SIZE (opts));
2772
2773 /* Size the string. */
2774 len = 0;
2775 sep_len = (add_nl_p) ? 3 : 1;
2776 for (i = 0; i < num; i++)
2777 {
2778 len += sep_len;
2779 for (j = 0; j < 2; j++)
2780 if (opts[i][j])
2781 len += strlen (opts[i][j]);
2782 }
2783
2784 /* Build the string. */
2785 ret = ptr = (char *) xmalloc (len);
2786 line_len = 0;
2787
2788 for (i = 0; i < num; i++)
2789 {
2790 size_t len2[2];
2791
2792 for (j = 0; j < 2; j++)
2793 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2794
2795 if (i != 0)
2796 {
2797 *ptr++ = ' ';
2798 line_len++;
2799
2800 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2801 {
2802 *ptr++ = '\\';
2803 *ptr++ = '\n';
2804 line_len = 0;
2805 }
2806 }
2807
2808 for (j = 0; j < 2; j++)
2809 if (opts[i][j])
2810 {
2811 memcpy (ptr, opts[i][j], len2[j]);
2812 ptr += len2[j];
2813 line_len += len2[j];
2814 }
2815 }
2816
2817 *ptr = '\0';
2818 gcc_assert (ret + len >= ptr);
2819
2820 return ret;
2821 }
2822
2823 /* Return true, if profiling code should be emitted before
2824 prologue. Otherwise it returns false.
2825 Note: For x86 with "hotfix" it is sorried. */
2826 static bool
2827 ix86_profile_before_prologue (void)
2828 {
2829 return flag_fentry != 0;
2830 }
2831
2832 /* Function that is callable from the debugger to print the current
2833 options. */
2834 void ATTRIBUTE_UNUSED
2835 ix86_debug_options (void)
2836 {
2837 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2838 ix86_arch_string, ix86_tune_string,
2839 ix86_fpmath, true);
2840
2841 if (opts)
2842 {
2843 fprintf (stderr, "%s\n\n", opts);
2844 free (opts);
2845 }
2846 else
2847 fputs ("<no options>\n\n", stderr);
2848
2849 return;
2850 }
2851
2852 static const char *stringop_alg_names[] = {
2853 #define DEF_ENUM
2854 #define DEF_ALG(alg, name) #name,
2855 #include "stringop.def"
2856 #undef DEF_ENUM
2857 #undef DEF_ALG
2858 };
2859
2860 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2861 The string is of the following form (or comma separated list of it):
2862
2863 strategy_alg:max_size:[align|noalign]
2864
2865 where the full size range for the strategy is either [0, max_size] or
2866 [min_size, max_size], in which min_size is the max_size + 1 of the
2867 preceding range. The last size range must have max_size == -1.
2868
2869 Examples:
2870
2871 1.
2872 -mmemcpy-strategy=libcall:-1:noalign
2873
2874 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2875
2876
2877 2.
2878 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2879
2880 This is to tell the compiler to use the following strategy for memset
2881 1) when the expected size is between [1, 16], use rep_8byte strategy;
2882 2) when the size is between [17, 2048], use vector_loop;
2883 3) when the size is > 2048, use libcall. */
2884
2885 struct stringop_size_range
2886 {
2887 int max;
2888 stringop_alg alg;
2889 bool noalign;
2890 };
2891
2892 static void
2893 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2894 {
2895 const struct stringop_algs *default_algs;
2896 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2897 char *curr_range_str, *next_range_str;
2898 int i = 0, n = 0;
2899
2900 if (is_memset)
2901 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2902 else
2903 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2904
2905 curr_range_str = strategy_str;
2906
2907 do
2908 {
2909 int maxs;
2910 char alg_name[128];
2911 char align[16];
2912 next_range_str = strchr (curr_range_str, ',');
2913 if (next_range_str)
2914 *next_range_str++ = '\0';
2915
2916 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2917 alg_name, &maxs, align))
2918 {
2919 error ("wrong arg %s to option %s", curr_range_str,
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2922 }
2923
2924 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2925 {
2926 error ("size ranges of option %s should be increasing",
2927 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2928 return;
2929 }
2930
2931 for (i = 0; i < last_alg; i++)
2932 if (!strcmp (alg_name, stringop_alg_names[i]))
2933 break;
2934
2935 if (i == last_alg)
2936 {
2937 error ("wrong stringop strategy name %s specified for option %s",
2938 alg_name,
2939 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2940 return;
2941 }
2942
2943 input_ranges[n].max = maxs;
2944 input_ranges[n].alg = (stringop_alg) i;
2945 if (!strcmp (align, "align"))
2946 input_ranges[n].noalign = false;
2947 else if (!strcmp (align, "noalign"))
2948 input_ranges[n].noalign = true;
2949 else
2950 {
2951 error ("unknown alignment %s specified for option %s",
2952 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2954 }
2955 n++;
2956 curr_range_str = next_range_str;
2957 }
2958 while (curr_range_str);
2959
2960 if (input_ranges[n - 1].max != -1)
2961 {
2962 error ("the max value for the last size range should be -1"
2963 " for option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2966 }
2967
2968 if (n > MAX_STRINGOP_ALGS)
2969 {
2970 error ("too many size ranges specified in option %s",
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2973 }
2974
2975 /* Now override the default algs array. */
2976 for (i = 0; i < n; i++)
2977 {
2978 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2979 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2980 = input_ranges[i].alg;
2981 *const_cast<int *>(&default_algs->size[i].noalign)
2982 = input_ranges[i].noalign;
2983 }
2984 }
2985
2986 \f
2987 /* parse -mtune-ctrl= option. When DUMP is true,
2988 print the features that are explicitly set. */
2989
2990 static void
2991 parse_mtune_ctrl_str (bool dump)
2992 {
2993 if (!ix86_tune_ctrl_string)
2994 return;
2995
2996 char *next_feature_string = NULL;
2997 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2998 char *orig = curr_feature_string;
2999 int i;
3000 do
3001 {
3002 bool clear = false;
3003
3004 next_feature_string = strchr (curr_feature_string, ',');
3005 if (next_feature_string)
3006 *next_feature_string++ = '\0';
3007 if (*curr_feature_string == '^')
3008 {
3009 curr_feature_string++;
3010 clear = true;
3011 }
3012 for (i = 0; i < X86_TUNE_LAST; i++)
3013 {
3014 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3015 {
3016 ix86_tune_features[i] = !clear;
3017 if (dump)
3018 fprintf (stderr, "Explicitly %s feature %s\n",
3019 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3020 break;
3021 }
3022 }
3023 if (i == X86_TUNE_LAST)
3024 error ("Unknown parameter to option -mtune-ctrl: %s",
3025 clear ? curr_feature_string - 1 : curr_feature_string);
3026 curr_feature_string = next_feature_string;
3027 }
3028 while (curr_feature_string);
3029 free (orig);
3030 }
3031
3032 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3033 processor type. */
3034
3035 static void
3036 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3037 {
3038 unsigned int ix86_tune_mask = 1u << ix86_tune;
3039 int i;
3040
3041 for (i = 0; i < X86_TUNE_LAST; ++i)
3042 {
3043 if (ix86_tune_no_default)
3044 ix86_tune_features[i] = 0;
3045 else
3046 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3047 }
3048
3049 if (dump)
3050 {
3051 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3052 for (i = 0; i < X86_TUNE_LAST; i++)
3053 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3054 ix86_tune_features[i] ? "on" : "off");
3055 }
3056
3057 parse_mtune_ctrl_str (dump);
3058 }
3059
3060
3061 /* Override various settings based on options. If MAIN_ARGS_P, the
3062 options are from the command line, otherwise they are from
3063 attributes. */
3064
3065 static void
3066 ix86_option_override_internal (bool main_args_p,
3067 struct gcc_options *opts,
3068 struct gcc_options *opts_set)
3069 {
3070 int i;
3071 unsigned int ix86_arch_mask;
3072 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3073 const char *prefix;
3074 const char *suffix;
3075 const char *sw;
3076
3077 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3078 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3079 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3080 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3081 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3082 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3083 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3084 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3085 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3086 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3087 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3088 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3089 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3090 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3091 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3092 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3093 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3094 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3095 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3096 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3097 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3098 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3099 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3100 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3101 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3102 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3103 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3104 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3105 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3106 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3107 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3108 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3109 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3110 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3111 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3112 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3113 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3114 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3115 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3116 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3117 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3118 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3119 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3120 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3121 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3122 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3123 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3124 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3125 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3126
3127 #define PTA_CORE2 \
3128 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3129 | PTA_CX16 | PTA_FXSR)
3130 #define PTA_NEHALEM \
3131 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3132 #define PTA_WESTMERE \
3133 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3134 #define PTA_SANDYBRIDGE \
3135 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3136 #define PTA_IVYBRIDGE \
3137 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3138 #define PTA_HASWELL \
3139 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3140 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3141 #define PTA_BROADWELL \
3142 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3143 #define PTA_BONNELL \
3144 (PTA_CORE2 | PTA_MOVBE)
3145 #define PTA_SILVERMONT \
3146 (PTA_WESTMERE | PTA_MOVBE)
3147
3148 /* if this reaches 64, need to widen struct pta flags below */
3149
3150 static struct pta
3151 {
3152 const char *const name; /* processor name or nickname. */
3153 const enum processor_type processor;
3154 const enum attr_cpu schedule;
3155 const unsigned HOST_WIDE_INT flags;
3156 }
3157 const processor_alias_table[] =
3158 {
3159 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3160 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3161 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3162 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3164 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3165 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3166 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_FXSR},
3169 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3170 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3172 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3175 PTA_MMX | PTA_SSE | PTA_FXSR},
3176 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3178 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3179 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3180 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3181 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3182 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3183 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3184 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3185 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3186 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3187 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3188 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3189 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3191 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3192 PTA_SANDYBRIDGE},
3193 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3194 PTA_SANDYBRIDGE},
3195 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3196 PTA_IVYBRIDGE},
3197 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3198 PTA_IVYBRIDGE},
3199 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3200 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3202 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3203 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3205 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3207 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3209 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3210 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3211 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3215 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3216 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3217 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3218 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3219 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3220 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3221 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3222 {"x86-64", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3224 {"k8", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"opteron", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"athlon64", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3241 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3242 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3244 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3245 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3246 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3247 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3248 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3249 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3250 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3251 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3256 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3261 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3262 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3263 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3264 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3265 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3266 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3267 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3268 | PTA_XSAVEOPT | PTA_FSGSBASE},
3269 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3273 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3274 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3275 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3276 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3277 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3278 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3279 | PTA_FXSR | PTA_XSAVE},
3280 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3281 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3282 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3283 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3284 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3286
3287 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3288 PTA_64BIT
3289 | PTA_HLE /* flags are only used for -march switch. */ },
3290 };
3291
3292 /* -mrecip options. */
3293 static struct
3294 {
3295 const char *string; /* option name */
3296 unsigned int mask; /* mask bits to set */
3297 }
3298 const recip_options[] =
3299 {
3300 { "all", RECIP_MASK_ALL },
3301 { "none", RECIP_MASK_NONE },
3302 { "div", RECIP_MASK_DIV },
3303 { "sqrt", RECIP_MASK_SQRT },
3304 { "vec-div", RECIP_MASK_VEC_DIV },
3305 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3306 };
3307
3308 int const pta_size = ARRAY_SIZE (processor_alias_table);
3309
3310 /* Set up prefix/suffix so the error messages refer to either the command
3311 line argument, or the attribute(target). */
3312 if (main_args_p)
3313 {
3314 prefix = "-m";
3315 suffix = "";
3316 sw = "switch";
3317 }
3318 else
3319 {
3320 prefix = "option(\"";
3321 suffix = "\")";
3322 sw = "attribute";
3323 }
3324
3325 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3326 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3327 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3328 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3329 #ifdef TARGET_BI_ARCH
3330 else
3331 {
3332 #if TARGET_BI_ARCH == 1
3333 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3334 is on and OPTION_MASK_ABI_X32 is off. We turn off
3335 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3336 -mx32. */
3337 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3338 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3339 #else
3340 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3341 on and OPTION_MASK_ABI_64 is off. We turn off
3342 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3343 -m64. */
3344 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3345 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3346 #endif
3347 }
3348 #endif
3349
3350 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3351 {
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_64 for TARGET_X32. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3356 }
3357 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3358 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3359 | OPTION_MASK_ABI_X32
3360 | OPTION_MASK_ABI_64);
3361 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3362 {
3363 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3364 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3365 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3366 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3367 }
3368
3369 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3370 SUBTARGET_OVERRIDE_OPTIONS;
3371 #endif
3372
3373 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3374 SUBSUBTARGET_OVERRIDE_OPTIONS;
3375 #endif
3376
3377 /* -fPIC is the default for x86_64. */
3378 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3379 opts->x_flag_pic = 2;
3380
3381 /* Need to check -mtune=generic first. */
3382 if (opts->x_ix86_tune_string)
3383 {
3384 /* As special support for cross compilers we read -mtune=native
3385 as -mtune=generic. With native compilers we won't see the
3386 -mtune=native, as it was changed by the driver. */
3387 if (!strcmp (opts->x_ix86_tune_string, "native"))
3388 {
3389 opts->x_ix86_tune_string = "generic";
3390 }
3391 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3392 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3393 "%stune=k8%s or %stune=generic%s instead as appropriate",
3394 prefix, suffix, prefix, suffix, prefix, suffix);
3395 }
3396 else
3397 {
3398 if (opts->x_ix86_arch_string)
3399 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3400 if (!opts->x_ix86_tune_string)
3401 {
3402 opts->x_ix86_tune_string
3403 = processor_target_table[TARGET_CPU_DEFAULT].name;
3404 ix86_tune_defaulted = 1;
3405 }
3406
3407 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3408 or defaulted. We need to use a sensible tune option. */
3409 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3410 {
3411 opts->x_ix86_tune_string = "generic";
3412 }
3413 }
3414
3415 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3416 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3417 {
3418 /* rep; movq isn't available in 32-bit code. */
3419 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3420 opts->x_ix86_stringop_alg = no_stringop;
3421 }
3422
3423 if (!opts->x_ix86_arch_string)
3424 opts->x_ix86_arch_string
3425 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3426 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3427 else
3428 ix86_arch_specified = 1;
3429
3430 if (opts_set->x_ix86_pmode)
3431 {
3432 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 && opts->x_ix86_pmode == PMODE_SI)
3434 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3435 && opts->x_ix86_pmode == PMODE_DI))
3436 error ("address mode %qs not supported in the %s bit mode",
3437 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3439 }
3440 else
3441 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3442 ? PMODE_DI : PMODE_SI;
3443
3444 if (!opts_set->x_ix86_abi)
3445 opts->x_ix86_abi = DEFAULT_ABI;
3446
3447 /* For targets using ms ABI enable ms-extensions, if not
3448 explicit turned off. For non-ms ABI we turn off this
3449 option. */
3450 if (!opts_set->x_flag_ms_extensions)
3451 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3452
3453 if (opts_set->x_ix86_cmodel)
3454 {
3455 switch (opts->x_ix86_cmodel)
3456 {
3457 case CM_SMALL:
3458 case CM_SMALL_PIC:
3459 if (opts->x_flag_pic)
3460 opts->x_ix86_cmodel = CM_SMALL_PIC;
3461 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3462 error ("code model %qs not supported in the %s bit mode",
3463 "small", "32");
3464 break;
3465
3466 case CM_MEDIUM:
3467 case CM_MEDIUM_PIC:
3468 if (opts->x_flag_pic)
3469 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3470 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3471 error ("code model %qs not supported in the %s bit mode",
3472 "medium", "32");
3473 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3474 error ("code model %qs not supported in x32 mode",
3475 "medium");
3476 break;
3477
3478 case CM_LARGE:
3479 case CM_LARGE_PIC:
3480 if (opts->x_flag_pic)
3481 opts->x_ix86_cmodel = CM_LARGE_PIC;
3482 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "large", "32");
3485 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3486 error ("code model %qs not supported in x32 mode",
3487 "large");
3488 break;
3489
3490 case CM_32:
3491 if (opts->x_flag_pic)
3492 error ("code model %s does not support PIC mode", "32");
3493 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "32", "64");
3496 break;
3497
3498 case CM_KERNEL:
3499 if (opts->x_flag_pic)
3500 {
3501 error ("code model %s does not support PIC mode", "kernel");
3502 opts->x_ix86_cmodel = CM_32;
3503 }
3504 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3505 error ("code model %qs not supported in the %s bit mode",
3506 "kernel", "32");
3507 break;
3508
3509 default:
3510 gcc_unreachable ();
3511 }
3512 }
3513 else
3514 {
3515 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3516 use of rip-relative addressing. This eliminates fixups that
3517 would otherwise be needed if this object is to be placed in a
3518 DLL, and is essentially just as efficient as direct addressing. */
3519 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3520 && (TARGET_RDOS || TARGET_PECOFF))
3521 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3522 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3524 else
3525 opts->x_ix86_cmodel = CM_32;
3526 }
3527 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3528 {
3529 error ("-masm=intel not supported in this configuration");
3530 opts->x_ix86_asm_dialect = ASM_ATT;
3531 }
3532 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3533 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3534 sorry ("%i-bit mode not compiled in",
3535 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3536
3537 for (i = 0; i < pta_size; i++)
3538 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3539 {
3540 ix86_schedule = processor_alias_table[i].schedule;
3541 ix86_arch = processor_alias_table[i].processor;
3542 /* Default cpu tuning to the architecture. */
3543 ix86_tune = ix86_arch;
3544
3545 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3546 && !(processor_alias_table[i].flags & PTA_64BIT))
3547 error ("CPU you selected does not support x86-64 "
3548 "instruction set");
3549
3550 if (processor_alias_table[i].flags & PTA_MMX
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3553 if (processor_alias_table[i].flags & PTA_3DNOW
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3556 if (processor_alias_table[i].flags & PTA_3DNOW_A
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3559 if (processor_alias_table[i].flags & PTA_SSE
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3562 if (processor_alias_table[i].flags & PTA_SSE2
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3565 if (processor_alias_table[i].flags & PTA_SSE3
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3568 if (processor_alias_table[i].flags & PTA_SSSE3
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3571 if (processor_alias_table[i].flags & PTA_SSE4_1
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3574 if (processor_alias_table[i].flags & PTA_SSE4_2
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3577 if (processor_alias_table[i].flags & PTA_AVX
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3580 if (processor_alias_table[i].flags & PTA_AVX2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3583 if (processor_alias_table[i].flags & PTA_FMA
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3586 if (processor_alias_table[i].flags & PTA_SSE4A
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3589 if (processor_alias_table[i].flags & PTA_FMA4
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3592 if (processor_alias_table[i].flags & PTA_XOP
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3595 if (processor_alias_table[i].flags & PTA_LWP
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3598 if (processor_alias_table[i].flags & PTA_ABM
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3601 if (processor_alias_table[i].flags & PTA_BMI
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3604 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3607 if (processor_alias_table[i].flags & PTA_TBM
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3610 if (processor_alias_table[i].flags & PTA_BMI2
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3613 if (processor_alias_table[i].flags & PTA_CX16
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3616 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3619 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3620 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3623 if (processor_alias_table[i].flags & PTA_MOVBE
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3626 if (processor_alias_table[i].flags & PTA_AES
3627 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3628 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3629 if (processor_alias_table[i].flags & PTA_SHA
3630 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3631 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3632 if (processor_alias_table[i].flags & PTA_PCLMUL
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3635 if (processor_alias_table[i].flags & PTA_FSGSBASE
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3638 if (processor_alias_table[i].flags & PTA_RDRND
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3641 if (processor_alias_table[i].flags & PTA_F16C
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3644 if (processor_alias_table[i].flags & PTA_RTM
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3647 if (processor_alias_table[i].flags & PTA_HLE
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3650 if (processor_alias_table[i].flags & PTA_PRFCHW
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3653 if (processor_alias_table[i].flags & PTA_RDSEED
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3656 if (processor_alias_table[i].flags & PTA_ADX
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3659 if (processor_alias_table[i].flags & PTA_FXSR
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3662 if (processor_alias_table[i].flags & PTA_XSAVE
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3665 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3668 if (processor_alias_table[i].flags & PTA_AVX512F
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3671 if (processor_alias_table[i].flags & PTA_AVX512ER
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3674 if (processor_alias_table[i].flags & PTA_AVX512PF
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3677 if (processor_alias_table[i].flags & PTA_AVX512CD
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3680 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3683 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3686 if (processor_alias_table[i].flags & PTA_XSAVEC
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3689 if (processor_alias_table[i].flags & PTA_XSAVES
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3692 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3693 x86_prefetch_sse = true;
3694
3695 break;
3696 }
3697
3698 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3699 error ("generic CPU can be used only for %stune=%s %s",
3700 prefix, suffix, sw);
3701 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3702 error ("intel CPU can be used only for %stune=%s %s",
3703 prefix, suffix, sw);
3704 else if (i == pta_size)
3705 error ("bad value (%s) for %sarch=%s %s",
3706 opts->x_ix86_arch_string, prefix, suffix, sw);
3707
3708 ix86_arch_mask = 1u << ix86_arch;
3709 for (i = 0; i < X86_ARCH_LAST; ++i)
3710 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3711
3712 for (i = 0; i < pta_size; i++)
3713 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3714 {
3715 ix86_schedule = processor_alias_table[i].schedule;
3716 ix86_tune = processor_alias_table[i].processor;
3717 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3718 {
3719 if (!(processor_alias_table[i].flags & PTA_64BIT))
3720 {
3721 if (ix86_tune_defaulted)
3722 {
3723 opts->x_ix86_tune_string = "x86-64";
3724 for (i = 0; i < pta_size; i++)
3725 if (! strcmp (opts->x_ix86_tune_string,
3726 processor_alias_table[i].name))
3727 break;
3728 ix86_schedule = processor_alias_table[i].schedule;
3729 ix86_tune = processor_alias_table[i].processor;
3730 }
3731 else
3732 error ("CPU you selected does not support x86-64 "
3733 "instruction set");
3734 }
3735 }
3736 /* Intel CPUs have always interpreted SSE prefetch instructions as
3737 NOPs; so, we can enable SSE prefetch instructions even when
3738 -mtune (rather than -march) points us to a processor that has them.
3739 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3740 higher processors. */
3741 if (TARGET_CMOV
3742 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3743 x86_prefetch_sse = true;
3744 break;
3745 }
3746
3747 if (ix86_tune_specified && i == pta_size)
3748 error ("bad value (%s) for %stune=%s %s",
3749 opts->x_ix86_tune_string, prefix, suffix, sw);
3750
3751 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3752
3753 #ifndef USE_IX86_FRAME_POINTER
3754 #define USE_IX86_FRAME_POINTER 0
3755 #endif
3756
3757 #ifndef USE_X86_64_FRAME_POINTER
3758 #define USE_X86_64_FRAME_POINTER 0
3759 #endif
3760
3761 /* Set the default values for switches whose default depends on TARGET_64BIT
3762 in case they weren't overwritten by command line options. */
3763 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3764 {
3765 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3766 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3767 if (opts->x_flag_asynchronous_unwind_tables
3768 && !opts_set->x_flag_unwind_tables
3769 && TARGET_64BIT_MS_ABI)
3770 opts->x_flag_unwind_tables = 1;
3771 if (opts->x_flag_asynchronous_unwind_tables == 2)
3772 opts->x_flag_unwind_tables
3773 = opts->x_flag_asynchronous_unwind_tables = 1;
3774 if (opts->x_flag_pcc_struct_return == 2)
3775 opts->x_flag_pcc_struct_return = 0;
3776 }
3777 else
3778 {
3779 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3780 opts->x_flag_omit_frame_pointer
3781 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3782 if (opts->x_flag_asynchronous_unwind_tables == 2)
3783 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3784 if (opts->x_flag_pcc_struct_return == 2)
3785 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3786 }
3787
3788 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3789 if (opts->x_optimize_size)
3790 ix86_cost = &ix86_size_cost;
3791 else
3792 ix86_cost = ix86_tune_cost;
3793
3794 /* Arrange to set up i386_stack_locals for all functions. */
3795 init_machine_status = ix86_init_machine_status;
3796
3797 /* Validate -mregparm= value. */
3798 if (opts_set->x_ix86_regparm)
3799 {
3800 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3801 warning (0, "-mregparm is ignored in 64-bit mode");
3802 if (opts->x_ix86_regparm > REGPARM_MAX)
3803 {
3804 error ("-mregparm=%d is not between 0 and %d",
3805 opts->x_ix86_regparm, REGPARM_MAX);
3806 opts->x_ix86_regparm = 0;
3807 }
3808 }
3809 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 opts->x_ix86_regparm = REGPARM_MAX;
3811
3812 /* Default align_* from the processor table. */
3813 if (opts->x_align_loops == 0)
3814 {
3815 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3816 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3817 }
3818 if (opts->x_align_jumps == 0)
3819 {
3820 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3821 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3822 }
3823 if (opts->x_align_functions == 0)
3824 {
3825 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3826 }
3827
3828 /* Provide default for -mbranch-cost= value. */
3829 if (!opts_set->x_ix86_branch_cost)
3830 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3831
3832 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3833 {
3834 opts->x_target_flags
3835 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3836
3837 /* Enable by default the SSE and MMX builtins. Do allow the user to
3838 explicitly disable any of these. In particular, disabling SSE and
3839 MMX for kernel code is extremely useful. */
3840 if (!ix86_arch_specified)
3841 opts->x_ix86_isa_flags
3842 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3843 | TARGET_SUBTARGET64_ISA_DEFAULT)
3844 & ~opts->x_ix86_isa_flags_explicit);
3845
3846 if (TARGET_RTD_P (opts->x_target_flags))
3847 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3848 }
3849 else
3850 {
3851 opts->x_target_flags
3852 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3853
3854 if (!ix86_arch_specified)
3855 opts->x_ix86_isa_flags
3856 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3857
3858 /* i386 ABI does not specify red zone. It still makes sense to use it
3859 when programmer takes care to stack from being destroyed. */
3860 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3861 opts->x_target_flags |= MASK_NO_RED_ZONE;
3862 }
3863
3864 /* Keep nonleaf frame pointers. */
3865 if (opts->x_flag_omit_frame_pointer)
3866 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3867 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3868 opts->x_flag_omit_frame_pointer = 1;
3869
3870 /* If we're doing fast math, we don't care about comparison order
3871 wrt NaNs. This lets us use a shorter comparison sequence. */
3872 if (opts->x_flag_finite_math_only)
3873 opts->x_target_flags &= ~MASK_IEEE_FP;
3874
3875 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3876 since the insns won't need emulation. */
3877 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3878 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3879
3880 /* Likewise, if the target doesn't have a 387, or we've specified
3881 software floating point, don't use 387 inline intrinsics. */
3882 if (!TARGET_80387_P (opts->x_target_flags))
3883 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3884
3885 /* Turn on MMX builtins for -msse. */
3886 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3887 opts->x_ix86_isa_flags
3888 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3889
3890 /* Enable SSE prefetch. */
3891 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3892 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3893 x86_prefetch_sse = true;
3894
3895 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3896 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3897 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3898 opts->x_ix86_isa_flags
3899 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3900
3901 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3902 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3903 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3904 opts->x_ix86_isa_flags
3905 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3906
3907 /* Enable lzcnt instruction for -mabm. */
3908 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3909 opts->x_ix86_isa_flags
3910 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3911
3912 /* Validate -mpreferred-stack-boundary= value or default it to
3913 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3914 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3915 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3916 {
3917 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3918 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3919 int max = (TARGET_SEH ? 4 : 12);
3920
3921 if (opts->x_ix86_preferred_stack_boundary_arg < min
3922 || opts->x_ix86_preferred_stack_boundary_arg > max)
3923 {
3924 if (min == max)
3925 error ("-mpreferred-stack-boundary is not supported "
3926 "for this target");
3927 else
3928 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3929 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3930 }
3931 else
3932 ix86_preferred_stack_boundary
3933 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3934 }
3935
3936 /* Set the default value for -mstackrealign. */
3937 if (opts->x_ix86_force_align_arg_pointer == -1)
3938 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3939
3940 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3941
3942 /* Validate -mincoming-stack-boundary= value or default it to
3943 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3944 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3945 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3946 {
3947 if (opts->x_ix86_incoming_stack_boundary_arg
3948 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3949 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3950 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3951 opts->x_ix86_incoming_stack_boundary_arg,
3952 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3953 else
3954 {
3955 ix86_user_incoming_stack_boundary
3956 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3957 ix86_incoming_stack_boundary
3958 = ix86_user_incoming_stack_boundary;
3959 }
3960 }
3961
3962 /* Accept -msseregparm only if at least SSE support is enabled. */
3963 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3964 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3965 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3966
3967 if (opts_set->x_ix86_fpmath)
3968 {
3969 if (opts->x_ix86_fpmath & FPMATH_SSE)
3970 {
3971 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3972 {
3973 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3974 opts->x_ix86_fpmath = FPMATH_387;
3975 }
3976 else if ((opts->x_ix86_fpmath & FPMATH_387)
3977 && !TARGET_80387_P (opts->x_target_flags))
3978 {
3979 warning (0, "387 instruction set disabled, using SSE arithmetics");
3980 opts->x_ix86_fpmath = FPMATH_SSE;
3981 }
3982 }
3983 }
3984 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3985 fpmath=387. The second is however default at many targets since the
3986 extra 80bit precision of temporaries is considered to be part of ABI.
3987 Overwrite the default at least for -ffast-math.
3988 TODO: -mfpmath=both seems to produce same performing code with bit
3989 smaller binaries. It is however not clear if register allocation is
3990 ready for this setting.
3991 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3992 codegen. We may switch to 387 with -ffast-math for size optimized
3993 functions. */
3994 else if (fast_math_flags_set_p (&global_options)
3995 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3996 opts->x_ix86_fpmath = FPMATH_SSE;
3997 else
3998 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3999
4000 /* If the i387 is disabled, then do not return values in it. */
4001 if (!TARGET_80387_P (opts->x_target_flags))
4002 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4003
4004 /* Use external vectorized library in vectorizing intrinsics. */
4005 if (opts_set->x_ix86_veclibabi_type)
4006 switch (opts->x_ix86_veclibabi_type)
4007 {
4008 case ix86_veclibabi_type_svml:
4009 ix86_veclib_handler = ix86_veclibabi_svml;
4010 break;
4011
4012 case ix86_veclibabi_type_acml:
4013 ix86_veclib_handler = ix86_veclibabi_acml;
4014 break;
4015
4016 default:
4017 gcc_unreachable ();
4018 }
4019
4020 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4021 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4022 && !opts->x_optimize_size)
4023 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4024
4025 /* If stack probes are required, the space used for large function
4026 arguments on the stack must also be probed, so enable
4027 -maccumulate-outgoing-args so this happens in the prologue. */
4028 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4029 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4030 {
4031 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4032 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4033 "for correctness", prefix, suffix);
4034 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4035 }
4036
4037 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4038 {
4039 char *p;
4040 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4041 p = strchr (internal_label_prefix, 'X');
4042 internal_label_prefix_len = p - internal_label_prefix;
4043 *p = '\0';
4044 }
4045
4046 /* When scheduling description is not available, disable scheduler pass
4047 so it won't slow down the compilation and make x87 code slower. */
4048 if (!TARGET_SCHEDULE)
4049 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4050
4051 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4052 ix86_tune_cost->simultaneous_prefetches,
4053 opts->x_param_values,
4054 opts_set->x_param_values);
4055 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4056 ix86_tune_cost->prefetch_block,
4057 opts->x_param_values,
4058 opts_set->x_param_values);
4059 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4060 ix86_tune_cost->l1_cache_size,
4061 opts->x_param_values,
4062 opts_set->x_param_values);
4063 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4064 ix86_tune_cost->l2_cache_size,
4065 opts->x_param_values,
4066 opts_set->x_param_values);
4067
4068 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4069 if (opts->x_flag_prefetch_loop_arrays < 0
4070 && HAVE_prefetch
4071 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4072 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4073 opts->x_flag_prefetch_loop_arrays = 1;
4074
4075 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4076 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4077 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4078 targetm.expand_builtin_va_start = NULL;
4079
4080 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4081 {
4082 ix86_gen_leave = gen_leave_rex64;
4083 if (Pmode == DImode)
4084 {
4085 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4086 ix86_gen_tls_local_dynamic_base_64
4087 = gen_tls_local_dynamic_base_64_di;
4088 }
4089 else
4090 {
4091 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4092 ix86_gen_tls_local_dynamic_base_64
4093 = gen_tls_local_dynamic_base_64_si;
4094 }
4095 }
4096 else
4097 ix86_gen_leave = gen_leave;
4098
4099 if (Pmode == DImode)
4100 {
4101 ix86_gen_add3 = gen_adddi3;
4102 ix86_gen_sub3 = gen_subdi3;
4103 ix86_gen_sub3_carry = gen_subdi3_carry;
4104 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4105 ix86_gen_andsp = gen_anddi3;
4106 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4107 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4108 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4109 ix86_gen_monitor = gen_sse3_monitor_di;
4110 }
4111 else
4112 {
4113 ix86_gen_add3 = gen_addsi3;
4114 ix86_gen_sub3 = gen_subsi3;
4115 ix86_gen_sub3_carry = gen_subsi3_carry;
4116 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4117 ix86_gen_andsp = gen_andsi3;
4118 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4119 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4120 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4121 ix86_gen_monitor = gen_sse3_monitor_si;
4122 }
4123
4124 #ifdef USE_IX86_CLD
4125 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4126 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4127 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4128 #endif
4129
4130 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4131 {
4132 if (opts->x_flag_fentry > 0)
4133 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4134 "with -fpic");
4135 opts->x_flag_fentry = 0;
4136 }
4137 else if (TARGET_SEH)
4138 {
4139 if (opts->x_flag_fentry == 0)
4140 sorry ("-mno-fentry isn%'t compatible with SEH");
4141 opts->x_flag_fentry = 1;
4142 }
4143 else if (opts->x_flag_fentry < 0)
4144 {
4145 #if defined(PROFILE_BEFORE_PROLOGUE)
4146 opts->x_flag_fentry = 1;
4147 #else
4148 opts->x_flag_fentry = 0;
4149 #endif
4150 }
4151
4152 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4153 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4154 AVX unaligned load/store. */
4155 if (!opts->x_optimize_size)
4156 {
4157 if (flag_expensive_optimizations
4158 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4159 opts->x_target_flags |= MASK_VZEROUPPER;
4160 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4161 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4162 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4163 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4164 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4165 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4166 /* Enable 128-bit AVX instruction generation
4167 for the auto-vectorizer. */
4168 if (TARGET_AVX128_OPTIMAL
4169 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4170 opts->x_target_flags |= MASK_PREFER_AVX128;
4171 }
4172
4173 if (opts->x_ix86_recip_name)
4174 {
4175 char *p = ASTRDUP (opts->x_ix86_recip_name);
4176 char *q;
4177 unsigned int mask, i;
4178 bool invert;
4179
4180 while ((q = strtok (p, ",")) != NULL)
4181 {
4182 p = NULL;
4183 if (*q == '!')
4184 {
4185 invert = true;
4186 q++;
4187 }
4188 else
4189 invert = false;
4190
4191 if (!strcmp (q, "default"))
4192 mask = RECIP_MASK_ALL;
4193 else
4194 {
4195 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4196 if (!strcmp (q, recip_options[i].string))
4197 {
4198 mask = recip_options[i].mask;
4199 break;
4200 }
4201
4202 if (i == ARRAY_SIZE (recip_options))
4203 {
4204 error ("unknown option for -mrecip=%s", q);
4205 invert = false;
4206 mask = RECIP_MASK_NONE;
4207 }
4208 }
4209
4210 opts->x_recip_mask_explicit |= mask;
4211 if (invert)
4212 opts->x_recip_mask &= ~mask;
4213 else
4214 opts->x_recip_mask |= mask;
4215 }
4216 }
4217
4218 if (TARGET_RECIP_P (opts->x_target_flags))
4219 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4220 else if (opts_set->x_target_flags & MASK_RECIP)
4221 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4222
4223 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4224 for 64-bit Bionic. */
4225 if (TARGET_HAS_BIONIC
4226 && !(opts_set->x_target_flags
4227 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4228 opts->x_target_flags |= (TARGET_64BIT
4229 ? MASK_LONG_DOUBLE_128
4230 : MASK_LONG_DOUBLE_64);
4231
4232 /* Only one of them can be active. */
4233 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4234 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4235
4236 /* Save the initial options in case the user does function specific
4237 options. */
4238 if (main_args_p)
4239 target_option_default_node = target_option_current_node
4240 = build_target_option_node (opts);
4241
4242 /* Handle stack protector */
4243 if (!opts_set->x_ix86_stack_protector_guard)
4244 opts->x_ix86_stack_protector_guard
4245 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4246
4247 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4248 if (opts->x_ix86_tune_memcpy_strategy)
4249 {
4250 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4251 ix86_parse_stringop_strategy_string (str, false);
4252 free (str);
4253 }
4254
4255 if (opts->x_ix86_tune_memset_strategy)
4256 {
4257 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4258 ix86_parse_stringop_strategy_string (str, true);
4259 free (str);
4260 }
4261 }
4262
4263 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4264
4265 static void
4266 ix86_option_override (void)
4267 {
4268 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4269 static struct register_pass_info insert_vzeroupper_info
4270 = { pass_insert_vzeroupper, "reload",
4271 1, PASS_POS_INSERT_AFTER
4272 };
4273
4274 ix86_option_override_internal (true, &global_options, &global_options_set);
4275
4276
4277 /* This needs to be done at start up. It's convenient to do it here. */
4278 register_pass (&insert_vzeroupper_info);
4279 }
4280
4281 /* Update register usage after having seen the compiler flags. */
4282
4283 static void
4284 ix86_conditional_register_usage (void)
4285 {
4286 int i, c_mask;
4287 unsigned int j;
4288
4289 /* The PIC register, if it exists, is fixed. */
4290 j = PIC_OFFSET_TABLE_REGNUM;
4291 if (j != INVALID_REGNUM)
4292 fixed_regs[j] = call_used_regs[j] = 1;
4293
4294 /* For 32-bit targets, squash the REX registers. */
4295 if (! TARGET_64BIT)
4296 {
4297 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4298 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4299 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4300 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4301 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4302 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4303 }
4304
4305 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4306 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4307 : TARGET_64BIT ? (1 << 2)
4308 : (1 << 1));
4309
4310 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4311
4312 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4313 {
4314 /* Set/reset conditionally defined registers from
4315 CALL_USED_REGISTERS initializer. */
4316 if (call_used_regs[i] > 1)
4317 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4318
4319 /* Calculate registers of CLOBBERED_REGS register set
4320 as call used registers from GENERAL_REGS register set. */
4321 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4322 && call_used_regs[i])
4323 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4324 }
4325
4326 /* If MMX is disabled, squash the registers. */
4327 if (! TARGET_MMX)
4328 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4329 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4330 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4331
4332 /* If SSE is disabled, squash the registers. */
4333 if (! TARGET_SSE)
4334 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4335 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4336 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4337
4338 /* If the FPU is disabled, squash the registers. */
4339 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4340 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4341 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4342 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4343
4344 /* If AVX512F is disabled, squash the registers. */
4345 if (! TARGET_AVX512F)
4346 {
4347 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4348 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4349
4350 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4352 }
4353 }
4354
4355 \f
4356 /* Save the current options */
4357
4358 static void
4359 ix86_function_specific_save (struct cl_target_option *ptr,
4360 struct gcc_options *opts)
4361 {
4362 ptr->arch = ix86_arch;
4363 ptr->schedule = ix86_schedule;
4364 ptr->tune = ix86_tune;
4365 ptr->branch_cost = ix86_branch_cost;
4366 ptr->tune_defaulted = ix86_tune_defaulted;
4367 ptr->arch_specified = ix86_arch_specified;
4368 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4369 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4370 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4371 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4372 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4373 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4374 ptr->x_ix86_abi = opts->x_ix86_abi;
4375 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4376 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4377 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4378 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4379 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4380 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4381 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4382 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4383 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4384 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4385 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4386 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4387 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4388 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4389 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4390 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4391 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4392 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4393 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4394 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4395
4396 /* The fields are char but the variables are not; make sure the
4397 values fit in the fields. */
4398 gcc_assert (ptr->arch == ix86_arch);
4399 gcc_assert (ptr->schedule == ix86_schedule);
4400 gcc_assert (ptr->tune == ix86_tune);
4401 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4402 }
4403
4404 /* Restore the current options */
4405
4406 static void
4407 ix86_function_specific_restore (struct gcc_options *opts,
4408 struct cl_target_option *ptr)
4409 {
4410 enum processor_type old_tune = ix86_tune;
4411 enum processor_type old_arch = ix86_arch;
4412 unsigned int ix86_arch_mask;
4413 int i;
4414
4415 /* We don't change -fPIC. */
4416 opts->x_flag_pic = flag_pic;
4417
4418 ix86_arch = (enum processor_type) ptr->arch;
4419 ix86_schedule = (enum attr_cpu) ptr->schedule;
4420 ix86_tune = (enum processor_type) ptr->tune;
4421 opts->x_ix86_branch_cost = ptr->branch_cost;
4422 ix86_tune_defaulted = ptr->tune_defaulted;
4423 ix86_arch_specified = ptr->arch_specified;
4424 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4425 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4426 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4427 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4428 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4429 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4430 opts->x_ix86_abi = ptr->x_ix86_abi;
4431 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4432 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4433 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4434 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4435 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4436 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4437 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4438 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4439 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4440 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4441 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4442 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4443 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4444 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4445 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4446 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4447 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4448 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4449 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4450 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4451
4452 /* Recreate the arch feature tests if the arch changed */
4453 if (old_arch != ix86_arch)
4454 {
4455 ix86_arch_mask = 1u << ix86_arch;
4456 for (i = 0; i < X86_ARCH_LAST; ++i)
4457 ix86_arch_features[i]
4458 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4459 }
4460
4461 /* Recreate the tune optimization tests */
4462 if (old_tune != ix86_tune)
4463 set_ix86_tune_features (ix86_tune, false);
4464 }
4465
4466 /* Print the current options */
4467
4468 static void
4469 ix86_function_specific_print (FILE *file, int indent,
4470 struct cl_target_option *ptr)
4471 {
4472 char *target_string
4473 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4474 NULL, NULL, ptr->x_ix86_fpmath, false);
4475
4476 gcc_assert (ptr->arch < PROCESSOR_max);
4477 fprintf (file, "%*sarch = %d (%s)\n",
4478 indent, "",
4479 ptr->arch, processor_target_table[ptr->arch].name);
4480
4481 gcc_assert (ptr->tune < PROCESSOR_max);
4482 fprintf (file, "%*stune = %d (%s)\n",
4483 indent, "",
4484 ptr->tune, processor_target_table[ptr->tune].name);
4485
4486 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4487
4488 if (target_string)
4489 {
4490 fprintf (file, "%*s%s\n", indent, "", target_string);
4491 free (target_string);
4492 }
4493 }
4494
4495 \f
4496 /* Inner function to process the attribute((target(...))), take an argument and
4497 set the current options from the argument. If we have a list, recursively go
4498 over the list. */
4499
4500 static bool
4501 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4502 struct gcc_options *opts,
4503 struct gcc_options *opts_set,
4504 struct gcc_options *enum_opts_set)
4505 {
4506 char *next_optstr;
4507 bool ret = true;
4508
4509 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4510 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4511 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4512 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4513 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4514
4515 enum ix86_opt_type
4516 {
4517 ix86_opt_unknown,
4518 ix86_opt_yes,
4519 ix86_opt_no,
4520 ix86_opt_str,
4521 ix86_opt_enum,
4522 ix86_opt_isa
4523 };
4524
4525 static const struct
4526 {
4527 const char *string;
4528 size_t len;
4529 enum ix86_opt_type type;
4530 int opt;
4531 int mask;
4532 } attrs[] = {
4533 /* isa options */
4534 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4535 IX86_ATTR_ISA ("abm", OPT_mabm),
4536 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4537 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4538 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4539 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4540 IX86_ATTR_ISA ("aes", OPT_maes),
4541 IX86_ATTR_ISA ("sha", OPT_msha),
4542 IX86_ATTR_ISA ("avx", OPT_mavx),
4543 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4544 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4545 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4546 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4547 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4548 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4549 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4550 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4551 IX86_ATTR_ISA ("sse", OPT_msse),
4552 IX86_ATTR_ISA ("sse2", OPT_msse2),
4553 IX86_ATTR_ISA ("sse3", OPT_msse3),
4554 IX86_ATTR_ISA ("sse4", OPT_msse4),
4555 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4556 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4557 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4558 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4559 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4560 IX86_ATTR_ISA ("fma", OPT_mfma),
4561 IX86_ATTR_ISA ("xop", OPT_mxop),
4562 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4563 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4564 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4565 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4566 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4567 IX86_ATTR_ISA ("hle", OPT_mhle),
4568 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4569 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4570 IX86_ATTR_ISA ("adx", OPT_madx),
4571 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4572 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4573 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4574 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4575 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4576 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4577 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4578
4579 /* enum options */
4580 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4581
4582 /* string options */
4583 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4584 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4585
4586 /* flag options */
4587 IX86_ATTR_YES ("cld",
4588 OPT_mcld,
4589 MASK_CLD),
4590
4591 IX86_ATTR_NO ("fancy-math-387",
4592 OPT_mfancy_math_387,
4593 MASK_NO_FANCY_MATH_387),
4594
4595 IX86_ATTR_YES ("ieee-fp",
4596 OPT_mieee_fp,
4597 MASK_IEEE_FP),
4598
4599 IX86_ATTR_YES ("inline-all-stringops",
4600 OPT_minline_all_stringops,
4601 MASK_INLINE_ALL_STRINGOPS),
4602
4603 IX86_ATTR_YES ("inline-stringops-dynamically",
4604 OPT_minline_stringops_dynamically,
4605 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4606
4607 IX86_ATTR_NO ("align-stringops",
4608 OPT_mno_align_stringops,
4609 MASK_NO_ALIGN_STRINGOPS),
4610
4611 IX86_ATTR_YES ("recip",
4612 OPT_mrecip,
4613 MASK_RECIP),
4614
4615 };
4616
4617 /* If this is a list, recurse to get the options. */
4618 if (TREE_CODE (args) == TREE_LIST)
4619 {
4620 bool ret = true;
4621
4622 for (; args; args = TREE_CHAIN (args))
4623 if (TREE_VALUE (args)
4624 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4625 p_strings, opts, opts_set,
4626 enum_opts_set))
4627 ret = false;
4628
4629 return ret;
4630 }
4631
4632 else if (TREE_CODE (args) != STRING_CST)
4633 {
4634 error ("attribute %<target%> argument not a string");
4635 return false;
4636 }
4637
4638 /* Handle multiple arguments separated by commas. */
4639 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4640
4641 while (next_optstr && *next_optstr != '\0')
4642 {
4643 char *p = next_optstr;
4644 char *orig_p = p;
4645 char *comma = strchr (next_optstr, ',');
4646 const char *opt_string;
4647 size_t len, opt_len;
4648 int opt;
4649 bool opt_set_p;
4650 char ch;
4651 unsigned i;
4652 enum ix86_opt_type type = ix86_opt_unknown;
4653 int mask = 0;
4654
4655 if (comma)
4656 {
4657 *comma = '\0';
4658 len = comma - next_optstr;
4659 next_optstr = comma + 1;
4660 }
4661 else
4662 {
4663 len = strlen (p);
4664 next_optstr = NULL;
4665 }
4666
4667 /* Recognize no-xxx. */
4668 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4669 {
4670 opt_set_p = false;
4671 p += 3;
4672 len -= 3;
4673 }
4674 else
4675 opt_set_p = true;
4676
4677 /* Find the option. */
4678 ch = *p;
4679 opt = N_OPTS;
4680 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4681 {
4682 type = attrs[i].type;
4683 opt_len = attrs[i].len;
4684 if (ch == attrs[i].string[0]
4685 && ((type != ix86_opt_str && type != ix86_opt_enum)
4686 ? len == opt_len
4687 : len > opt_len)
4688 && memcmp (p, attrs[i].string, opt_len) == 0)
4689 {
4690 opt = attrs[i].opt;
4691 mask = attrs[i].mask;
4692 opt_string = attrs[i].string;
4693 break;
4694 }
4695 }
4696
4697 /* Process the option. */
4698 if (opt == N_OPTS)
4699 {
4700 error ("attribute(target(\"%s\")) is unknown", orig_p);
4701 ret = false;
4702 }
4703
4704 else if (type == ix86_opt_isa)
4705 {
4706 struct cl_decoded_option decoded;
4707
4708 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4709 ix86_handle_option (opts, opts_set,
4710 &decoded, input_location);
4711 }
4712
4713 else if (type == ix86_opt_yes || type == ix86_opt_no)
4714 {
4715 if (type == ix86_opt_no)
4716 opt_set_p = !opt_set_p;
4717
4718 if (opt_set_p)
4719 opts->x_target_flags |= mask;
4720 else
4721 opts->x_target_flags &= ~mask;
4722 }
4723
4724 else if (type == ix86_opt_str)
4725 {
4726 if (p_strings[opt])
4727 {
4728 error ("option(\"%s\") was already specified", opt_string);
4729 ret = false;
4730 }
4731 else
4732 p_strings[opt] = xstrdup (p + opt_len);
4733 }
4734
4735 else if (type == ix86_opt_enum)
4736 {
4737 bool arg_ok;
4738 int value;
4739
4740 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4741 if (arg_ok)
4742 set_option (opts, enum_opts_set, opt, value,
4743 p + opt_len, DK_UNSPECIFIED, input_location,
4744 global_dc);
4745 else
4746 {
4747 error ("attribute(target(\"%s\")) is unknown", orig_p);
4748 ret = false;
4749 }
4750 }
4751
4752 else
4753 gcc_unreachable ();
4754 }
4755
4756 return ret;
4757 }
4758
4759 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4760
4761 tree
4762 ix86_valid_target_attribute_tree (tree args,
4763 struct gcc_options *opts,
4764 struct gcc_options *opts_set)
4765 {
4766 const char *orig_arch_string = opts->x_ix86_arch_string;
4767 const char *orig_tune_string = opts->x_ix86_tune_string;
4768 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4769 int orig_tune_defaulted = ix86_tune_defaulted;
4770 int orig_arch_specified = ix86_arch_specified;
4771 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4772 tree t = NULL_TREE;
4773 int i;
4774 struct cl_target_option *def
4775 = TREE_TARGET_OPTION (target_option_default_node);
4776 struct gcc_options enum_opts_set;
4777
4778 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4779
4780 /* Process each of the options on the chain. */
4781 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4782 opts_set, &enum_opts_set))
4783 return error_mark_node;
4784
4785 /* If the changed options are different from the default, rerun
4786 ix86_option_override_internal, and then save the options away.
4787 The string options are are attribute options, and will be undone
4788 when we copy the save structure. */
4789 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4790 || opts->x_target_flags != def->x_target_flags
4791 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4792 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4793 || enum_opts_set.x_ix86_fpmath)
4794 {
4795 /* If we are using the default tune= or arch=, undo the string assigned,
4796 and use the default. */
4797 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4798 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4799 else if (!orig_arch_specified)
4800 opts->x_ix86_arch_string = NULL;
4801
4802 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4803 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4804 else if (orig_tune_defaulted)
4805 opts->x_ix86_tune_string = NULL;
4806
4807 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4808 if (enum_opts_set.x_ix86_fpmath)
4809 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4810 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4811 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4812 {
4813 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4814 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4815 }
4816
4817 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4818 ix86_option_override_internal (false, opts, opts_set);
4819
4820 /* Add any builtin functions with the new isa if any. */
4821 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4822
4823 /* Save the current options unless we are validating options for
4824 #pragma. */
4825 t = build_target_option_node (opts);
4826
4827 opts->x_ix86_arch_string = orig_arch_string;
4828 opts->x_ix86_tune_string = orig_tune_string;
4829 opts_set->x_ix86_fpmath = orig_fpmath_set;
4830
4831 /* Free up memory allocated to hold the strings */
4832 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4833 free (option_strings[i]);
4834 }
4835
4836 return t;
4837 }
4838
4839 /* Hook to validate attribute((target("string"))). */
4840
4841 static bool
4842 ix86_valid_target_attribute_p (tree fndecl,
4843 tree ARG_UNUSED (name),
4844 tree args,
4845 int ARG_UNUSED (flags))
4846 {
4847 struct gcc_options func_options;
4848 tree new_target, new_optimize;
4849 bool ret = true;
4850
4851 /* attribute((target("default"))) does nothing, beyond
4852 affecting multi-versioning. */
4853 if (TREE_VALUE (args)
4854 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4855 && TREE_CHAIN (args) == NULL_TREE
4856 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4857 return true;
4858
4859 tree old_optimize = build_optimization_node (&global_options);
4860
4861 /* Get the optimization options of the current function. */
4862 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4863
4864 if (!func_optimize)
4865 func_optimize = old_optimize;
4866
4867 /* Init func_options. */
4868 memset (&func_options, 0, sizeof (func_options));
4869 init_options_struct (&func_options, NULL);
4870 lang_hooks.init_options_struct (&func_options);
4871
4872 cl_optimization_restore (&func_options,
4873 TREE_OPTIMIZATION (func_optimize));
4874
4875 /* Initialize func_options to the default before its target options can
4876 be set. */
4877 cl_target_option_restore (&func_options,
4878 TREE_TARGET_OPTION (target_option_default_node));
4879
4880 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4881 &global_options_set);
4882
4883 new_optimize = build_optimization_node (&func_options);
4884
4885 if (new_target == error_mark_node)
4886 ret = false;
4887
4888 else if (fndecl && new_target)
4889 {
4890 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4891
4892 if (old_optimize != new_optimize)
4893 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4894 }
4895
4896 return ret;
4897 }
4898
4899 \f
4900 /* Hook to determine if one function can safely inline another. */
4901
4902 static bool
4903 ix86_can_inline_p (tree caller, tree callee)
4904 {
4905 bool ret = false;
4906 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4907 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4908
4909 /* If callee has no option attributes, then it is ok to inline. */
4910 if (!callee_tree)
4911 ret = true;
4912
4913 /* If caller has no option attributes, but callee does then it is not ok to
4914 inline. */
4915 else if (!caller_tree)
4916 ret = false;
4917
4918 else
4919 {
4920 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4921 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4922
4923 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4924 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4925 function. */
4926 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4927 != callee_opts->x_ix86_isa_flags)
4928 ret = false;
4929
4930 /* See if we have the same non-isa options. */
4931 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4932 ret = false;
4933
4934 /* See if arch, tune, etc. are the same. */
4935 else if (caller_opts->arch != callee_opts->arch)
4936 ret = false;
4937
4938 else if (caller_opts->tune != callee_opts->tune)
4939 ret = false;
4940
4941 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4942 ret = false;
4943
4944 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4945 ret = false;
4946
4947 else
4948 ret = true;
4949 }
4950
4951 return ret;
4952 }
4953
4954 \f
4955 /* Remember the last target of ix86_set_current_function. */
4956 static GTY(()) tree ix86_previous_fndecl;
4957
4958 /* Invalidate ix86_previous_fndecl cache. */
4959 void
4960 ix86_reset_previous_fndecl (void)
4961 {
4962 ix86_previous_fndecl = NULL_TREE;
4963 }
4964
4965 /* Establish appropriate back-end context for processing the function
4966 FNDECL. The argument might be NULL to indicate processing at top
4967 level, outside of any function scope. */
4968 static void
4969 ix86_set_current_function (tree fndecl)
4970 {
4971 /* Only change the context if the function changes. This hook is called
4972 several times in the course of compiling a function, and we don't want to
4973 slow things down too much or call target_reinit when it isn't safe. */
4974 if (fndecl && fndecl != ix86_previous_fndecl)
4975 {
4976 tree old_tree = (ix86_previous_fndecl
4977 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4978 : NULL_TREE);
4979
4980 tree new_tree = (fndecl
4981 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4982 : NULL_TREE);
4983
4984 ix86_previous_fndecl = fndecl;
4985 if (old_tree == new_tree)
4986 ;
4987
4988 else if (new_tree)
4989 {
4990 cl_target_option_restore (&global_options,
4991 TREE_TARGET_OPTION (new_tree));
4992 if (TREE_TARGET_GLOBALS (new_tree))
4993 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4994 else
4995 TREE_TARGET_GLOBALS (new_tree)
4996 = save_target_globals_default_opts ();
4997 }
4998
4999 else if (old_tree)
5000 {
5001 new_tree = target_option_current_node;
5002 cl_target_option_restore (&global_options,
5003 TREE_TARGET_OPTION (new_tree));
5004 if (TREE_TARGET_GLOBALS (new_tree))
5005 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5006 else if (new_tree == target_option_default_node)
5007 restore_target_globals (&default_target_globals);
5008 else
5009 TREE_TARGET_GLOBALS (new_tree)
5010 = save_target_globals_default_opts ();
5011 }
5012 }
5013 }
5014
5015 \f
5016 /* Return true if this goes in large data/bss. */
5017
5018 static bool
5019 ix86_in_large_data_p (tree exp)
5020 {
5021 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5022 return false;
5023
5024 /* Functions are never large data. */
5025 if (TREE_CODE (exp) == FUNCTION_DECL)
5026 return false;
5027
5028 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5029 {
5030 const char *section = DECL_SECTION_NAME (exp);
5031 if (strcmp (section, ".ldata") == 0
5032 || strcmp (section, ".lbss") == 0)
5033 return true;
5034 return false;
5035 }
5036 else
5037 {
5038 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5039
5040 /* If this is an incomplete type with size 0, then we can't put it
5041 in data because it might be too big when completed. Also,
5042 int_size_in_bytes returns -1 if size can vary or is larger than
5043 an integer in which case also it is safer to assume that it goes in
5044 large data. */
5045 if (size <= 0 || size > ix86_section_threshold)
5046 return true;
5047 }
5048
5049 return false;
5050 }
5051
5052 /* Switch to the appropriate section for output of DECL.
5053 DECL is either a `VAR_DECL' node or a constant of some sort.
5054 RELOC indicates whether forming the initial value of DECL requires
5055 link-time relocations. */
5056
5057 ATTRIBUTE_UNUSED static section *
5058 x86_64_elf_select_section (tree decl, int reloc,
5059 unsigned HOST_WIDE_INT align)
5060 {
5061 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5062 && ix86_in_large_data_p (decl))
5063 {
5064 const char *sname = NULL;
5065 unsigned int flags = SECTION_WRITE;
5066 switch (categorize_decl_for_section (decl, reloc))
5067 {
5068 case SECCAT_DATA:
5069 sname = ".ldata";
5070 break;
5071 case SECCAT_DATA_REL:
5072 sname = ".ldata.rel";
5073 break;
5074 case SECCAT_DATA_REL_LOCAL:
5075 sname = ".ldata.rel.local";
5076 break;
5077 case SECCAT_DATA_REL_RO:
5078 sname = ".ldata.rel.ro";
5079 break;
5080 case SECCAT_DATA_REL_RO_LOCAL:
5081 sname = ".ldata.rel.ro.local";
5082 break;
5083 case SECCAT_BSS:
5084 sname = ".lbss";
5085 flags |= SECTION_BSS;
5086 break;
5087 case SECCAT_RODATA:
5088 case SECCAT_RODATA_MERGE_STR:
5089 case SECCAT_RODATA_MERGE_STR_INIT:
5090 case SECCAT_RODATA_MERGE_CONST:
5091 sname = ".lrodata";
5092 flags = 0;
5093 break;
5094 case SECCAT_SRODATA:
5095 case SECCAT_SDATA:
5096 case SECCAT_SBSS:
5097 gcc_unreachable ();
5098 case SECCAT_TEXT:
5099 case SECCAT_TDATA:
5100 case SECCAT_TBSS:
5101 /* We don't split these for medium model. Place them into
5102 default sections and hope for best. */
5103 break;
5104 }
5105 if (sname)
5106 {
5107 /* We might get called with string constants, but get_named_section
5108 doesn't like them as they are not DECLs. Also, we need to set
5109 flags in that case. */
5110 if (!DECL_P (decl))
5111 return get_section (sname, flags, NULL);
5112 return get_named_section (decl, sname, reloc);
5113 }
5114 }
5115 return default_elf_select_section (decl, reloc, align);
5116 }
5117
5118 /* Select a set of attributes for section NAME based on the properties
5119 of DECL and whether or not RELOC indicates that DECL's initializer
5120 might contain runtime relocations. */
5121
5122 static unsigned int ATTRIBUTE_UNUSED
5123 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5124 {
5125 unsigned int flags = default_section_type_flags (decl, name, reloc);
5126
5127 if (decl == NULL_TREE
5128 && (strcmp (name, ".ldata.rel.ro") == 0
5129 || strcmp (name, ".ldata.rel.ro.local") == 0))
5130 flags |= SECTION_RELRO;
5131
5132 if (strcmp (name, ".lbss") == 0
5133 || strncmp (name, ".lbss.", 5) == 0
5134 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5135 flags |= SECTION_BSS;
5136
5137 return flags;
5138 }
5139
5140 /* Build up a unique section name, expressed as a
5141 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5142 RELOC indicates whether the initial value of EXP requires
5143 link-time relocations. */
5144
5145 static void ATTRIBUTE_UNUSED
5146 x86_64_elf_unique_section (tree decl, int reloc)
5147 {
5148 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5149 && ix86_in_large_data_p (decl))
5150 {
5151 const char *prefix = NULL;
5152 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5153 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5154
5155 switch (categorize_decl_for_section (decl, reloc))
5156 {
5157 case SECCAT_DATA:
5158 case SECCAT_DATA_REL:
5159 case SECCAT_DATA_REL_LOCAL:
5160 case SECCAT_DATA_REL_RO:
5161 case SECCAT_DATA_REL_RO_LOCAL:
5162 prefix = one_only ? ".ld" : ".ldata";
5163 break;
5164 case SECCAT_BSS:
5165 prefix = one_only ? ".lb" : ".lbss";
5166 break;
5167 case SECCAT_RODATA:
5168 case SECCAT_RODATA_MERGE_STR:
5169 case SECCAT_RODATA_MERGE_STR_INIT:
5170 case SECCAT_RODATA_MERGE_CONST:
5171 prefix = one_only ? ".lr" : ".lrodata";
5172 break;
5173 case SECCAT_SRODATA:
5174 case SECCAT_SDATA:
5175 case SECCAT_SBSS:
5176 gcc_unreachable ();
5177 case SECCAT_TEXT:
5178 case SECCAT_TDATA:
5179 case SECCAT_TBSS:
5180 /* We don't split these for medium model. Place them into
5181 default sections and hope for best. */
5182 break;
5183 }
5184 if (prefix)
5185 {
5186 const char *name, *linkonce;
5187 char *string;
5188
5189 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5190 name = targetm.strip_name_encoding (name);
5191
5192 /* If we're using one_only, then there needs to be a .gnu.linkonce
5193 prefix to the section name. */
5194 linkonce = one_only ? ".gnu.linkonce" : "";
5195
5196 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5197
5198 set_decl_section_name (decl, string);
5199 return;
5200 }
5201 }
5202 default_unique_section (decl, reloc);
5203 }
5204
5205 #ifdef COMMON_ASM_OP
5206 /* This says how to output assembler code to declare an
5207 uninitialized external linkage data object.
5208
5209 For medium model x86-64 we need to use .largecomm opcode for
5210 large objects. */
5211 void
5212 x86_elf_aligned_common (FILE *file,
5213 const char *name, unsigned HOST_WIDE_INT size,
5214 int align)
5215 {
5216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5217 && size > (unsigned int)ix86_section_threshold)
5218 fputs (".largecomm\t", file);
5219 else
5220 fputs (COMMON_ASM_OP, file);
5221 assemble_name (file, name);
5222 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5223 size, align / BITS_PER_UNIT);
5224 }
5225 #endif
5226
5227 /* Utility function for targets to use in implementing
5228 ASM_OUTPUT_ALIGNED_BSS. */
5229
5230 void
5231 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5232 const char *name, unsigned HOST_WIDE_INT size,
5233 int align)
5234 {
5235 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5236 && size > (unsigned int)ix86_section_threshold)
5237 switch_to_section (get_named_section (decl, ".lbss", 0));
5238 else
5239 switch_to_section (bss_section);
5240 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5241 #ifdef ASM_DECLARE_OBJECT_NAME
5242 last_assemble_variable_decl = decl;
5243 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5244 #else
5245 /* Standard thing is just output label for the object. */
5246 ASM_OUTPUT_LABEL (file, name);
5247 #endif /* ASM_DECLARE_OBJECT_NAME */
5248 ASM_OUTPUT_SKIP (file, size ? size : 1);
5249 }
5250 \f
5251 /* Decide whether we must probe the stack before any space allocation
5252 on this target. It's essentially TARGET_STACK_PROBE except when
5253 -fstack-check causes the stack to be already probed differently. */
5254
5255 bool
5256 ix86_target_stack_probe (void)
5257 {
5258 /* Do not probe the stack twice if static stack checking is enabled. */
5259 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5260 return false;
5261
5262 return TARGET_STACK_PROBE;
5263 }
5264 \f
5265 /* Decide whether we can make a sibling call to a function. DECL is the
5266 declaration of the function being targeted by the call and EXP is the
5267 CALL_EXPR representing the call. */
5268
5269 static bool
5270 ix86_function_ok_for_sibcall (tree decl, tree exp)
5271 {
5272 tree type, decl_or_type;
5273 rtx a, b;
5274
5275 /* If we are generating position-independent code, we cannot sibcall
5276 optimize any indirect call, or a direct call to a global function,
5277 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5278 if (!TARGET_MACHO
5279 && !TARGET_64BIT
5280 && flag_pic
5281 && (!decl || !targetm.binds_local_p (decl)))
5282 return false;
5283
5284 /* If we need to align the outgoing stack, then sibcalling would
5285 unalign the stack, which may break the called function. */
5286 if (ix86_minimum_incoming_stack_boundary (true)
5287 < PREFERRED_STACK_BOUNDARY)
5288 return false;
5289
5290 if (decl)
5291 {
5292 decl_or_type = decl;
5293 type = TREE_TYPE (decl);
5294 }
5295 else
5296 {
5297 /* We're looking at the CALL_EXPR, we need the type of the function. */
5298 type = CALL_EXPR_FN (exp); /* pointer expression */
5299 type = TREE_TYPE (type); /* pointer type */
5300 type = TREE_TYPE (type); /* function type */
5301 decl_or_type = type;
5302 }
5303
5304 /* Check that the return value locations are the same. Like
5305 if we are returning floats on the 80387 register stack, we cannot
5306 make a sibcall from a function that doesn't return a float to a
5307 function that does or, conversely, from a function that does return
5308 a float to a function that doesn't; the necessary stack adjustment
5309 would not be executed. This is also the place we notice
5310 differences in the return value ABI. Note that it is ok for one
5311 of the functions to have void return type as long as the return
5312 value of the other is passed in a register. */
5313 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5314 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5315 cfun->decl, false);
5316 if (STACK_REG_P (a) || STACK_REG_P (b))
5317 {
5318 if (!rtx_equal_p (a, b))
5319 return false;
5320 }
5321 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5322 ;
5323 else if (!rtx_equal_p (a, b))
5324 return false;
5325
5326 if (TARGET_64BIT)
5327 {
5328 /* The SYSV ABI has more call-clobbered registers;
5329 disallow sibcalls from MS to SYSV. */
5330 if (cfun->machine->call_abi == MS_ABI
5331 && ix86_function_type_abi (type) == SYSV_ABI)
5332 return false;
5333 }
5334 else
5335 {
5336 /* If this call is indirect, we'll need to be able to use a
5337 call-clobbered register for the address of the target function.
5338 Make sure that all such registers are not used for passing
5339 parameters. Note that DLLIMPORT functions are indirect. */
5340 if (!decl
5341 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5342 {
5343 if (ix86_function_regparm (type, NULL) >= 3)
5344 {
5345 /* ??? Need to count the actual number of registers to be used,
5346 not the possible number of registers. Fix later. */
5347 return false;
5348 }
5349 }
5350 }
5351
5352 /* Otherwise okay. That also includes certain types of indirect calls. */
5353 return true;
5354 }
5355
5356 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5357 and "sseregparm" calling convention attributes;
5358 arguments as in struct attribute_spec.handler. */
5359
5360 static tree
5361 ix86_handle_cconv_attribute (tree *node, tree name,
5362 tree args,
5363 int flags ATTRIBUTE_UNUSED,
5364 bool *no_add_attrs)
5365 {
5366 if (TREE_CODE (*node) != FUNCTION_TYPE
5367 && TREE_CODE (*node) != METHOD_TYPE
5368 && TREE_CODE (*node) != FIELD_DECL
5369 && TREE_CODE (*node) != TYPE_DECL)
5370 {
5371 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5372 name);
5373 *no_add_attrs = true;
5374 return NULL_TREE;
5375 }
5376
5377 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5378 if (is_attribute_p ("regparm", name))
5379 {
5380 tree cst;
5381
5382 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5383 {
5384 error ("fastcall and regparm attributes are not compatible");
5385 }
5386
5387 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5388 {
5389 error ("regparam and thiscall attributes are not compatible");
5390 }
5391
5392 cst = TREE_VALUE (args);
5393 if (TREE_CODE (cst) != INTEGER_CST)
5394 {
5395 warning (OPT_Wattributes,
5396 "%qE attribute requires an integer constant argument",
5397 name);
5398 *no_add_attrs = true;
5399 }
5400 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5401 {
5402 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5403 name, REGPARM_MAX);
5404 *no_add_attrs = true;
5405 }
5406
5407 return NULL_TREE;
5408 }
5409
5410 if (TARGET_64BIT)
5411 {
5412 /* Do not warn when emulating the MS ABI. */
5413 if ((TREE_CODE (*node) != FUNCTION_TYPE
5414 && TREE_CODE (*node) != METHOD_TYPE)
5415 || ix86_function_type_abi (*node) != MS_ABI)
5416 warning (OPT_Wattributes, "%qE attribute ignored",
5417 name);
5418 *no_add_attrs = true;
5419 return NULL_TREE;
5420 }
5421
5422 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5423 if (is_attribute_p ("fastcall", name))
5424 {
5425 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5426 {
5427 error ("fastcall and cdecl attributes are not compatible");
5428 }
5429 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5430 {
5431 error ("fastcall and stdcall attributes are not compatible");
5432 }
5433 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5434 {
5435 error ("fastcall and regparm attributes are not compatible");
5436 }
5437 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5438 {
5439 error ("fastcall and thiscall attributes are not compatible");
5440 }
5441 }
5442
5443 /* Can combine stdcall with fastcall (redundant), regparm and
5444 sseregparm. */
5445 else if (is_attribute_p ("stdcall", name))
5446 {
5447 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5448 {
5449 error ("stdcall and cdecl attributes are not compatible");
5450 }
5451 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5452 {
5453 error ("stdcall and fastcall attributes are not compatible");
5454 }
5455 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5456 {
5457 error ("stdcall and thiscall attributes are not compatible");
5458 }
5459 }
5460
5461 /* Can combine cdecl with regparm and sseregparm. */
5462 else if (is_attribute_p ("cdecl", name))
5463 {
5464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5465 {
5466 error ("stdcall and cdecl attributes are not compatible");
5467 }
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5469 {
5470 error ("fastcall and cdecl attributes are not compatible");
5471 }
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("cdecl and thiscall attributes are not compatible");
5475 }
5476 }
5477 else if (is_attribute_p ("thiscall", name))
5478 {
5479 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5480 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5481 name);
5482 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5483 {
5484 error ("stdcall and thiscall attributes are not compatible");
5485 }
5486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5487 {
5488 error ("fastcall and thiscall attributes are not compatible");
5489 }
5490 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5491 {
5492 error ("cdecl and thiscall attributes are not compatible");
5493 }
5494 }
5495
5496 /* Can combine sseregparm with all attributes. */
5497
5498 return NULL_TREE;
5499 }
5500
5501 /* The transactional memory builtins are implicitly regparm or fastcall
5502 depending on the ABI. Override the generic do-nothing attribute that
5503 these builtins were declared with, and replace it with one of the two
5504 attributes that we expect elsewhere. */
5505
5506 static tree
5507 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5508 tree args ATTRIBUTE_UNUSED,
5509 int flags, bool *no_add_attrs)
5510 {
5511 tree alt;
5512
5513 /* In no case do we want to add the placeholder attribute. */
5514 *no_add_attrs = true;
5515
5516 /* The 64-bit ABI is unchanged for transactional memory. */
5517 if (TARGET_64BIT)
5518 return NULL_TREE;
5519
5520 /* ??? Is there a better way to validate 32-bit windows? We have
5521 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5522 if (CHECK_STACK_LIMIT > 0)
5523 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5524 else
5525 {
5526 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5527 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5528 }
5529 decl_attributes (node, alt, flags);
5530
5531 return NULL_TREE;
5532 }
5533
5534 /* This function determines from TYPE the calling-convention. */
5535
5536 unsigned int
5537 ix86_get_callcvt (const_tree type)
5538 {
5539 unsigned int ret = 0;
5540 bool is_stdarg;
5541 tree attrs;
5542
5543 if (TARGET_64BIT)
5544 return IX86_CALLCVT_CDECL;
5545
5546 attrs = TYPE_ATTRIBUTES (type);
5547 if (attrs != NULL_TREE)
5548 {
5549 if (lookup_attribute ("cdecl", attrs))
5550 ret |= IX86_CALLCVT_CDECL;
5551 else if (lookup_attribute ("stdcall", attrs))
5552 ret |= IX86_CALLCVT_STDCALL;
5553 else if (lookup_attribute ("fastcall", attrs))
5554 ret |= IX86_CALLCVT_FASTCALL;
5555 else if (lookup_attribute ("thiscall", attrs))
5556 ret |= IX86_CALLCVT_THISCALL;
5557
5558 /* Regparam isn't allowed for thiscall and fastcall. */
5559 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5560 {
5561 if (lookup_attribute ("regparm", attrs))
5562 ret |= IX86_CALLCVT_REGPARM;
5563 if (lookup_attribute ("sseregparm", attrs))
5564 ret |= IX86_CALLCVT_SSEREGPARM;
5565 }
5566
5567 if (IX86_BASE_CALLCVT(ret) != 0)
5568 return ret;
5569 }
5570
5571 is_stdarg = stdarg_p (type);
5572 if (TARGET_RTD && !is_stdarg)
5573 return IX86_CALLCVT_STDCALL | ret;
5574
5575 if (ret != 0
5576 || is_stdarg
5577 || TREE_CODE (type) != METHOD_TYPE
5578 || ix86_function_type_abi (type) != MS_ABI)
5579 return IX86_CALLCVT_CDECL | ret;
5580
5581 return IX86_CALLCVT_THISCALL;
5582 }
5583
5584 /* Return 0 if the attributes for two types are incompatible, 1 if they
5585 are compatible, and 2 if they are nearly compatible (which causes a
5586 warning to be generated). */
5587
5588 static int
5589 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5590 {
5591 unsigned int ccvt1, ccvt2;
5592
5593 if (TREE_CODE (type1) != FUNCTION_TYPE
5594 && TREE_CODE (type1) != METHOD_TYPE)
5595 return 1;
5596
5597 ccvt1 = ix86_get_callcvt (type1);
5598 ccvt2 = ix86_get_callcvt (type2);
5599 if (ccvt1 != ccvt2)
5600 return 0;
5601 if (ix86_function_regparm (type1, NULL)
5602 != ix86_function_regparm (type2, NULL))
5603 return 0;
5604
5605 return 1;
5606 }
5607 \f
5608 /* Return the regparm value for a function with the indicated TYPE and DECL.
5609 DECL may be NULL when calling function indirectly
5610 or considering a libcall. */
5611
5612 static int
5613 ix86_function_regparm (const_tree type, const_tree decl)
5614 {
5615 tree attr;
5616 int regparm;
5617 unsigned int ccvt;
5618
5619 if (TARGET_64BIT)
5620 return (ix86_function_type_abi (type) == SYSV_ABI
5621 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5622 ccvt = ix86_get_callcvt (type);
5623 regparm = ix86_regparm;
5624
5625 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5626 {
5627 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5628 if (attr)
5629 {
5630 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5631 return regparm;
5632 }
5633 }
5634 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5635 return 2;
5636 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5637 return 1;
5638
5639 /* Use register calling convention for local functions when possible. */
5640 if (decl
5641 && TREE_CODE (decl) == FUNCTION_DECL
5642 /* Caller and callee must agree on the calling convention, so
5643 checking here just optimize means that with
5644 __attribute__((optimize (...))) caller could use regparm convention
5645 and callee not, or vice versa. Instead look at whether the callee
5646 is optimized or not. */
5647 && opt_for_fn (decl, optimize)
5648 && !(profile_flag && !flag_fentry))
5649 {
5650 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5651 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5652 if (i && i->local && i->can_change_signature)
5653 {
5654 int local_regparm, globals = 0, regno;
5655
5656 /* Make sure no regparm register is taken by a
5657 fixed register variable. */
5658 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5659 if (fixed_regs[local_regparm])
5660 break;
5661
5662 /* We don't want to use regparm(3) for nested functions as
5663 these use a static chain pointer in the third argument. */
5664 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5665 local_regparm = 2;
5666
5667 /* In 32-bit mode save a register for the split stack. */
5668 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5669 local_regparm = 2;
5670
5671 /* Each fixed register usage increases register pressure,
5672 so less registers should be used for argument passing.
5673 This functionality can be overriden by an explicit
5674 regparm value. */
5675 for (regno = AX_REG; regno <= DI_REG; regno++)
5676 if (fixed_regs[regno])
5677 globals++;
5678
5679 local_regparm
5680 = globals < local_regparm ? local_regparm - globals : 0;
5681
5682 if (local_regparm > regparm)
5683 regparm = local_regparm;
5684 }
5685 }
5686
5687 return regparm;
5688 }
5689
5690 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5691 DFmode (2) arguments in SSE registers for a function with the
5692 indicated TYPE and DECL. DECL may be NULL when calling function
5693 indirectly or considering a libcall. Otherwise return 0. */
5694
5695 static int
5696 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5697 {
5698 gcc_assert (!TARGET_64BIT);
5699
5700 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5701 by the sseregparm attribute. */
5702 if (TARGET_SSEREGPARM
5703 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5704 {
5705 if (!TARGET_SSE)
5706 {
5707 if (warn)
5708 {
5709 if (decl)
5710 error ("calling %qD with attribute sseregparm without "
5711 "SSE/SSE2 enabled", decl);
5712 else
5713 error ("calling %qT with attribute sseregparm without "
5714 "SSE/SSE2 enabled", type);
5715 }
5716 return 0;
5717 }
5718
5719 return 2;
5720 }
5721
5722 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5723 (and DFmode for SSE2) arguments in SSE registers. */
5724 if (decl && TARGET_SSE_MATH && optimize
5725 && !(profile_flag && !flag_fentry))
5726 {
5727 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5728 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5729 if (i && i->local && i->can_change_signature)
5730 return TARGET_SSE2 ? 2 : 1;
5731 }
5732
5733 return 0;
5734 }
5735
5736 /* Return true if EAX is live at the start of the function. Used by
5737 ix86_expand_prologue to determine if we need special help before
5738 calling allocate_stack_worker. */
5739
5740 static bool
5741 ix86_eax_live_at_start_p (void)
5742 {
5743 /* Cheat. Don't bother working forward from ix86_function_regparm
5744 to the function type to whether an actual argument is located in
5745 eax. Instead just look at cfg info, which is still close enough
5746 to correct at this point. This gives false positives for broken
5747 functions that might use uninitialized data that happens to be
5748 allocated in eax, but who cares? */
5749 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5750 }
5751
5752 static bool
5753 ix86_keep_aggregate_return_pointer (tree fntype)
5754 {
5755 tree attr;
5756
5757 if (!TARGET_64BIT)
5758 {
5759 attr = lookup_attribute ("callee_pop_aggregate_return",
5760 TYPE_ATTRIBUTES (fntype));
5761 if (attr)
5762 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5763
5764 /* For 32-bit MS-ABI the default is to keep aggregate
5765 return pointer. */
5766 if (ix86_function_type_abi (fntype) == MS_ABI)
5767 return true;
5768 }
5769 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5770 }
5771
5772 /* Value is the number of bytes of arguments automatically
5773 popped when returning from a subroutine call.
5774 FUNDECL is the declaration node of the function (as a tree),
5775 FUNTYPE is the data type of the function (as a tree),
5776 or for a library call it is an identifier node for the subroutine name.
5777 SIZE is the number of bytes of arguments passed on the stack.
5778
5779 On the 80386, the RTD insn may be used to pop them if the number
5780 of args is fixed, but if the number is variable then the caller
5781 must pop them all. RTD can't be used for library calls now
5782 because the library is compiled with the Unix compiler.
5783 Use of RTD is a selectable option, since it is incompatible with
5784 standard Unix calling sequences. If the option is not selected,
5785 the caller must always pop the args.
5786
5787 The attribute stdcall is equivalent to RTD on a per module basis. */
5788
5789 static int
5790 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5791 {
5792 unsigned int ccvt;
5793
5794 /* None of the 64-bit ABIs pop arguments. */
5795 if (TARGET_64BIT)
5796 return 0;
5797
5798 ccvt = ix86_get_callcvt (funtype);
5799
5800 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5801 | IX86_CALLCVT_THISCALL)) != 0
5802 && ! stdarg_p (funtype))
5803 return size;
5804
5805 /* Lose any fake structure return argument if it is passed on the stack. */
5806 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5807 && !ix86_keep_aggregate_return_pointer (funtype))
5808 {
5809 int nregs = ix86_function_regparm (funtype, fundecl);
5810 if (nregs == 0)
5811 return GET_MODE_SIZE (Pmode);
5812 }
5813
5814 return 0;
5815 }
5816
5817 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5818
5819 static bool
5820 ix86_legitimate_combined_insn (rtx insn)
5821 {
5822 /* Check operand constraints in case hard registers were propagated
5823 into insn pattern. This check prevents combine pass from
5824 generating insn patterns with invalid hard register operands.
5825 These invalid insns can eventually confuse reload to error out
5826 with a spill failure. See also PRs 46829 and 46843. */
5827 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5828 {
5829 int i;
5830
5831 extract_insn (insn);
5832 preprocess_constraints (insn);
5833
5834 int n_operands = recog_data.n_operands;
5835 int n_alternatives = recog_data.n_alternatives;
5836 for (i = 0; i < n_operands; i++)
5837 {
5838 rtx op = recog_data.operand[i];
5839 enum machine_mode mode = GET_MODE (op);
5840 const operand_alternative *op_alt;
5841 int offset = 0;
5842 bool win;
5843 int j;
5844
5845 /* For pre-AVX disallow unaligned loads/stores where the
5846 instructions don't support it. */
5847 if (!TARGET_AVX
5848 && VECTOR_MODE_P (GET_MODE (op))
5849 && misaligned_operand (op, GET_MODE (op)))
5850 {
5851 int min_align = get_attr_ssememalign (insn);
5852 if (min_align == 0)
5853 return false;
5854 }
5855
5856 /* A unary operator may be accepted by the predicate, but it
5857 is irrelevant for matching constraints. */
5858 if (UNARY_P (op))
5859 op = XEXP (op, 0);
5860
5861 if (GET_CODE (op) == SUBREG)
5862 {
5863 if (REG_P (SUBREG_REG (op))
5864 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5865 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5866 GET_MODE (SUBREG_REG (op)),
5867 SUBREG_BYTE (op),
5868 GET_MODE (op));
5869 op = SUBREG_REG (op);
5870 }
5871
5872 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5873 continue;
5874
5875 op_alt = recog_op_alt;
5876
5877 /* Operand has no constraints, anything is OK. */
5878 win = !n_alternatives;
5879
5880 alternative_mask enabled = recog_data.enabled_alternatives;
5881 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5882 {
5883 if (!TEST_BIT (enabled, j))
5884 continue;
5885 if (op_alt[i].anything_ok
5886 || (op_alt[i].matches != -1
5887 && operands_match_p
5888 (recog_data.operand[i],
5889 recog_data.operand[op_alt[i].matches]))
5890 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5891 {
5892 win = true;
5893 break;
5894 }
5895 }
5896
5897 if (!win)
5898 return false;
5899 }
5900 }
5901
5902 return true;
5903 }
5904 \f
5905 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5906
5907 static unsigned HOST_WIDE_INT
5908 ix86_asan_shadow_offset (void)
5909 {
5910 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5911 : HOST_WIDE_INT_C (0x7fff8000))
5912 : (HOST_WIDE_INT_1 << 29);
5913 }
5914 \f
5915 /* Argument support functions. */
5916
5917 /* Return true when register may be used to pass function parameters. */
5918 bool
5919 ix86_function_arg_regno_p (int regno)
5920 {
5921 int i;
5922 const int *parm_regs;
5923
5924 if (!TARGET_64BIT)
5925 {
5926 if (TARGET_MACHO)
5927 return (regno < REGPARM_MAX
5928 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5929 else
5930 return (regno < REGPARM_MAX
5931 || (TARGET_MMX && MMX_REGNO_P (regno)
5932 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5933 || (TARGET_SSE && SSE_REGNO_P (regno)
5934 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5935 }
5936
5937 if (TARGET_SSE && SSE_REGNO_P (regno)
5938 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5939 return true;
5940
5941 /* TODO: The function should depend on current function ABI but
5942 builtins.c would need updating then. Therefore we use the
5943 default ABI. */
5944
5945 /* RAX is used as hidden argument to va_arg functions. */
5946 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5947 return true;
5948
5949 if (ix86_abi == MS_ABI)
5950 parm_regs = x86_64_ms_abi_int_parameter_registers;
5951 else
5952 parm_regs = x86_64_int_parameter_registers;
5953 for (i = 0; i < (ix86_abi == MS_ABI
5954 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5955 if (regno == parm_regs[i])
5956 return true;
5957 return false;
5958 }
5959
5960 /* Return if we do not know how to pass TYPE solely in registers. */
5961
5962 static bool
5963 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5964 {
5965 if (must_pass_in_stack_var_size_or_pad (mode, type))
5966 return true;
5967
5968 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5969 The layout_type routine is crafty and tries to trick us into passing
5970 currently unsupported vector types on the stack by using TImode. */
5971 return (!TARGET_64BIT && mode == TImode
5972 && type && TREE_CODE (type) != VECTOR_TYPE);
5973 }
5974
5975 /* It returns the size, in bytes, of the area reserved for arguments passed
5976 in registers for the function represented by fndecl dependent to the used
5977 abi format. */
5978 int
5979 ix86_reg_parm_stack_space (const_tree fndecl)
5980 {
5981 enum calling_abi call_abi = SYSV_ABI;
5982 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5983 call_abi = ix86_function_abi (fndecl);
5984 else
5985 call_abi = ix86_function_type_abi (fndecl);
5986 if (TARGET_64BIT && call_abi == MS_ABI)
5987 return 32;
5988 return 0;
5989 }
5990
5991 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5992 call abi used. */
5993 enum calling_abi
5994 ix86_function_type_abi (const_tree fntype)
5995 {
5996 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5997 {
5998 enum calling_abi abi = ix86_abi;
5999 if (abi == SYSV_ABI)
6000 {
6001 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6002 abi = MS_ABI;
6003 }
6004 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6005 abi = SYSV_ABI;
6006 return abi;
6007 }
6008 return ix86_abi;
6009 }
6010
6011 /* We add this as a workaround in order to use libc_has_function
6012 hook in i386.md. */
6013 bool
6014 ix86_libc_has_function (enum function_class fn_class)
6015 {
6016 return targetm.libc_has_function (fn_class);
6017 }
6018
6019 static bool
6020 ix86_function_ms_hook_prologue (const_tree fn)
6021 {
6022 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6023 {
6024 if (decl_function_context (fn) != NULL_TREE)
6025 error_at (DECL_SOURCE_LOCATION (fn),
6026 "ms_hook_prologue is not compatible with nested function");
6027 else
6028 return true;
6029 }
6030 return false;
6031 }
6032
6033 static enum calling_abi
6034 ix86_function_abi (const_tree fndecl)
6035 {
6036 if (! fndecl)
6037 return ix86_abi;
6038 return ix86_function_type_abi (TREE_TYPE (fndecl));
6039 }
6040
6041 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6042 call abi used. */
6043 enum calling_abi
6044 ix86_cfun_abi (void)
6045 {
6046 if (! cfun)
6047 return ix86_abi;
6048 return cfun->machine->call_abi;
6049 }
6050
6051 /* Write the extra assembler code needed to declare a function properly. */
6052
6053 void
6054 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6055 tree decl)
6056 {
6057 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6058
6059 if (is_ms_hook)
6060 {
6061 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6062 unsigned int filler_cc = 0xcccccccc;
6063
6064 for (i = 0; i < filler_count; i += 4)
6065 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6066 }
6067
6068 #ifdef SUBTARGET_ASM_UNWIND_INIT
6069 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6070 #endif
6071
6072 ASM_OUTPUT_LABEL (asm_out_file, fname);
6073
6074 /* Output magic byte marker, if hot-patch attribute is set. */
6075 if (is_ms_hook)
6076 {
6077 if (TARGET_64BIT)
6078 {
6079 /* leaq [%rsp + 0], %rsp */
6080 asm_fprintf (asm_out_file, ASM_BYTE
6081 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6082 }
6083 else
6084 {
6085 /* movl.s %edi, %edi
6086 push %ebp
6087 movl.s %esp, %ebp */
6088 asm_fprintf (asm_out_file, ASM_BYTE
6089 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6090 }
6091 }
6092 }
6093
6094 /* regclass.c */
6095 extern void init_regs (void);
6096
6097 /* Implementation of call abi switching target hook. Specific to FNDECL
6098 the specific call register sets are set. See also
6099 ix86_conditional_register_usage for more details. */
6100 void
6101 ix86_call_abi_override (const_tree fndecl)
6102 {
6103 if (fndecl == NULL_TREE)
6104 cfun->machine->call_abi = ix86_abi;
6105 else
6106 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6107 }
6108
6109 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6110 expensive re-initialization of init_regs each time we switch function context
6111 since this is needed only during RTL expansion. */
6112 static void
6113 ix86_maybe_switch_abi (void)
6114 {
6115 if (TARGET_64BIT &&
6116 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6117 reinit_regs ();
6118 }
6119
6120 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6121 for a call to a function whose data type is FNTYPE.
6122 For a library call, FNTYPE is 0. */
6123
6124 void
6125 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6126 tree fntype, /* tree ptr for function decl */
6127 rtx libname, /* SYMBOL_REF of library name or 0 */
6128 tree fndecl,
6129 int caller)
6130 {
6131 struct cgraph_local_info *i;
6132
6133 memset (cum, 0, sizeof (*cum));
6134
6135 if (fndecl)
6136 {
6137 i = cgraph_local_info (fndecl);
6138 cum->call_abi = ix86_function_abi (fndecl);
6139 }
6140 else
6141 {
6142 i = NULL;
6143 cum->call_abi = ix86_function_type_abi (fntype);
6144 }
6145
6146 cum->caller = caller;
6147
6148 /* Set up the number of registers to use for passing arguments. */
6149 cum->nregs = ix86_regparm;
6150 if (TARGET_64BIT)
6151 {
6152 cum->nregs = (cum->call_abi == SYSV_ABI
6153 ? X86_64_REGPARM_MAX
6154 : X86_64_MS_REGPARM_MAX);
6155 }
6156 if (TARGET_SSE)
6157 {
6158 cum->sse_nregs = SSE_REGPARM_MAX;
6159 if (TARGET_64BIT)
6160 {
6161 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6162 ? X86_64_SSE_REGPARM_MAX
6163 : X86_64_MS_SSE_REGPARM_MAX);
6164 }
6165 }
6166 if (TARGET_MMX)
6167 cum->mmx_nregs = MMX_REGPARM_MAX;
6168 cum->warn_avx512f = true;
6169 cum->warn_avx = true;
6170 cum->warn_sse = true;
6171 cum->warn_mmx = true;
6172
6173 /* Because type might mismatch in between caller and callee, we need to
6174 use actual type of function for local calls.
6175 FIXME: cgraph_analyze can be told to actually record if function uses
6176 va_start so for local functions maybe_vaarg can be made aggressive
6177 helping K&R code.
6178 FIXME: once typesytem is fixed, we won't need this code anymore. */
6179 if (i && i->local && i->can_change_signature)
6180 fntype = TREE_TYPE (fndecl);
6181 cum->maybe_vaarg = (fntype
6182 ? (!prototype_p (fntype) || stdarg_p (fntype))
6183 : !libname);
6184
6185 if (!TARGET_64BIT)
6186 {
6187 /* If there are variable arguments, then we won't pass anything
6188 in registers in 32-bit mode. */
6189 if (stdarg_p (fntype))
6190 {
6191 cum->nregs = 0;
6192 cum->sse_nregs = 0;
6193 cum->mmx_nregs = 0;
6194 cum->warn_avx512f = false;
6195 cum->warn_avx = false;
6196 cum->warn_sse = false;
6197 cum->warn_mmx = false;
6198 return;
6199 }
6200
6201 /* Use ecx and edx registers if function has fastcall attribute,
6202 else look for regparm information. */
6203 if (fntype)
6204 {
6205 unsigned int ccvt = ix86_get_callcvt (fntype);
6206 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6207 {
6208 cum->nregs = 1;
6209 cum->fastcall = 1; /* Same first register as in fastcall. */
6210 }
6211 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6212 {
6213 cum->nregs = 2;
6214 cum->fastcall = 1;
6215 }
6216 else
6217 cum->nregs = ix86_function_regparm (fntype, fndecl);
6218 }
6219
6220 /* Set up the number of SSE registers used for passing SFmode
6221 and DFmode arguments. Warn for mismatching ABI. */
6222 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6223 }
6224 }
6225
6226 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6227 But in the case of vector types, it is some vector mode.
6228
6229 When we have only some of our vector isa extensions enabled, then there
6230 are some modes for which vector_mode_supported_p is false. For these
6231 modes, the generic vector support in gcc will choose some non-vector mode
6232 in order to implement the type. By computing the natural mode, we'll
6233 select the proper ABI location for the operand and not depend on whatever
6234 the middle-end decides to do with these vector types.
6235
6236 The midde-end can't deal with the vector types > 16 bytes. In this
6237 case, we return the original mode and warn ABI change if CUM isn't
6238 NULL.
6239
6240 If INT_RETURN is true, warn ABI change if the vector mode isn't
6241 available for function return value. */
6242
6243 static enum machine_mode
6244 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6245 bool in_return)
6246 {
6247 enum machine_mode mode = TYPE_MODE (type);
6248
6249 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6250 {
6251 HOST_WIDE_INT size = int_size_in_bytes (type);
6252 if ((size == 8 || size == 16 || size == 32 || size == 64)
6253 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6254 && TYPE_VECTOR_SUBPARTS (type) > 1)
6255 {
6256 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6257
6258 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6259 mode = MIN_MODE_VECTOR_FLOAT;
6260 else
6261 mode = MIN_MODE_VECTOR_INT;
6262
6263 /* Get the mode which has this inner mode and number of units. */
6264 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6265 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6266 && GET_MODE_INNER (mode) == innermode)
6267 {
6268 if (size == 64 && !TARGET_AVX512F)
6269 {
6270 static bool warnedavx512f;
6271 static bool warnedavx512f_ret;
6272
6273 if (cum && cum->warn_avx512f && !warnedavx512f)
6274 {
6275 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6276 "without AVX512F enabled changes the ABI"))
6277 warnedavx512f = true;
6278 }
6279 else if (in_return && !warnedavx512f_ret)
6280 {
6281 if (warning (OPT_Wpsabi, "AVX512F vector return "
6282 "without AVX512F enabled changes the ABI"))
6283 warnedavx512f_ret = true;
6284 }
6285
6286 return TYPE_MODE (type);
6287 }
6288 else if (size == 32 && !TARGET_AVX)
6289 {
6290 static bool warnedavx;
6291 static bool warnedavx_ret;
6292
6293 if (cum && cum->warn_avx && !warnedavx)
6294 {
6295 if (warning (OPT_Wpsabi, "AVX vector argument "
6296 "without AVX enabled changes the ABI"))
6297 warnedavx = true;
6298 }
6299 else if (in_return && !warnedavx_ret)
6300 {
6301 if (warning (OPT_Wpsabi, "AVX vector return "
6302 "without AVX enabled changes the ABI"))
6303 warnedavx_ret = true;
6304 }
6305
6306 return TYPE_MODE (type);
6307 }
6308 else if (((size == 8 && TARGET_64BIT) || size == 16)
6309 && !TARGET_SSE)
6310 {
6311 static bool warnedsse;
6312 static bool warnedsse_ret;
6313
6314 if (cum && cum->warn_sse && !warnedsse)
6315 {
6316 if (warning (OPT_Wpsabi, "SSE vector argument "
6317 "without SSE enabled changes the ABI"))
6318 warnedsse = true;
6319 }
6320 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6321 {
6322 if (warning (OPT_Wpsabi, "SSE vector return "
6323 "without SSE enabled changes the ABI"))
6324 warnedsse_ret = true;
6325 }
6326 }
6327 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6328 {
6329 static bool warnedmmx;
6330 static bool warnedmmx_ret;
6331
6332 if (cum && cum->warn_mmx && !warnedmmx)
6333 {
6334 if (warning (OPT_Wpsabi, "MMX vector argument "
6335 "without MMX enabled changes the ABI"))
6336 warnedmmx = true;
6337 }
6338 else if (in_return && !warnedmmx_ret)
6339 {
6340 if (warning (OPT_Wpsabi, "MMX vector return "
6341 "without MMX enabled changes the ABI"))
6342 warnedmmx_ret = true;
6343 }
6344 }
6345 return mode;
6346 }
6347
6348 gcc_unreachable ();
6349 }
6350 }
6351
6352 return mode;
6353 }
6354
6355 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6356 this may not agree with the mode that the type system has chosen for the
6357 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6358 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6359
6360 static rtx
6361 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6362 unsigned int regno)
6363 {
6364 rtx tmp;
6365
6366 if (orig_mode != BLKmode)
6367 tmp = gen_rtx_REG (orig_mode, regno);
6368 else
6369 {
6370 tmp = gen_rtx_REG (mode, regno);
6371 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6372 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6373 }
6374
6375 return tmp;
6376 }
6377
6378 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6379 of this code is to classify each 8bytes of incoming argument by the register
6380 class and assign registers accordingly. */
6381
6382 /* Return the union class of CLASS1 and CLASS2.
6383 See the x86-64 PS ABI for details. */
6384
6385 static enum x86_64_reg_class
6386 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6387 {
6388 /* Rule #1: If both classes are equal, this is the resulting class. */
6389 if (class1 == class2)
6390 return class1;
6391
6392 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6393 the other class. */
6394 if (class1 == X86_64_NO_CLASS)
6395 return class2;
6396 if (class2 == X86_64_NO_CLASS)
6397 return class1;
6398
6399 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6400 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6401 return X86_64_MEMORY_CLASS;
6402
6403 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6404 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6405 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6406 return X86_64_INTEGERSI_CLASS;
6407 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6408 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6409 return X86_64_INTEGER_CLASS;
6410
6411 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6412 MEMORY is used. */
6413 if (class1 == X86_64_X87_CLASS
6414 || class1 == X86_64_X87UP_CLASS
6415 || class1 == X86_64_COMPLEX_X87_CLASS
6416 || class2 == X86_64_X87_CLASS
6417 || class2 == X86_64_X87UP_CLASS
6418 || class2 == X86_64_COMPLEX_X87_CLASS)
6419 return X86_64_MEMORY_CLASS;
6420
6421 /* Rule #6: Otherwise class SSE is used. */
6422 return X86_64_SSE_CLASS;
6423 }
6424
6425 /* Classify the argument of type TYPE and mode MODE.
6426 CLASSES will be filled by the register class used to pass each word
6427 of the operand. The number of words is returned. In case the parameter
6428 should be passed in memory, 0 is returned. As a special case for zero
6429 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6430
6431 BIT_OFFSET is used internally for handling records and specifies offset
6432 of the offset in bits modulo 512 to avoid overflow cases.
6433
6434 See the x86-64 PS ABI for details.
6435 */
6436
6437 static int
6438 classify_argument (enum machine_mode mode, const_tree type,
6439 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6440 {
6441 HOST_WIDE_INT bytes =
6442 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6443 int words
6444 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6445
6446 /* Variable sized entities are always passed/returned in memory. */
6447 if (bytes < 0)
6448 return 0;
6449
6450 if (mode != VOIDmode
6451 && targetm.calls.must_pass_in_stack (mode, type))
6452 return 0;
6453
6454 if (type && AGGREGATE_TYPE_P (type))
6455 {
6456 int i;
6457 tree field;
6458 enum x86_64_reg_class subclasses[MAX_CLASSES];
6459
6460 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6461 if (bytes > 64)
6462 return 0;
6463
6464 for (i = 0; i < words; i++)
6465 classes[i] = X86_64_NO_CLASS;
6466
6467 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6468 signalize memory class, so handle it as special case. */
6469 if (!words)
6470 {
6471 classes[0] = X86_64_NO_CLASS;
6472 return 1;
6473 }
6474
6475 /* Classify each field of record and merge classes. */
6476 switch (TREE_CODE (type))
6477 {
6478 case RECORD_TYPE:
6479 /* And now merge the fields of structure. */
6480 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6481 {
6482 if (TREE_CODE (field) == FIELD_DECL)
6483 {
6484 int num;
6485
6486 if (TREE_TYPE (field) == error_mark_node)
6487 continue;
6488
6489 /* Bitfields are always classified as integer. Handle them
6490 early, since later code would consider them to be
6491 misaligned integers. */
6492 if (DECL_BIT_FIELD (field))
6493 {
6494 for (i = (int_bit_position (field)
6495 + (bit_offset % 64)) / 8 / 8;
6496 i < ((int_bit_position (field) + (bit_offset % 64))
6497 + tree_to_shwi (DECL_SIZE (field))
6498 + 63) / 8 / 8; i++)
6499 classes[i] =
6500 merge_classes (X86_64_INTEGER_CLASS,
6501 classes[i]);
6502 }
6503 else
6504 {
6505 int pos;
6506
6507 type = TREE_TYPE (field);
6508
6509 /* Flexible array member is ignored. */
6510 if (TYPE_MODE (type) == BLKmode
6511 && TREE_CODE (type) == ARRAY_TYPE
6512 && TYPE_SIZE (type) == NULL_TREE
6513 && TYPE_DOMAIN (type) != NULL_TREE
6514 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6515 == NULL_TREE))
6516 {
6517 static bool warned;
6518
6519 if (!warned && warn_psabi)
6520 {
6521 warned = true;
6522 inform (input_location,
6523 "the ABI of passing struct with"
6524 " a flexible array member has"
6525 " changed in GCC 4.4");
6526 }
6527 continue;
6528 }
6529 num = classify_argument (TYPE_MODE (type), type,
6530 subclasses,
6531 (int_bit_position (field)
6532 + bit_offset) % 512);
6533 if (!num)
6534 return 0;
6535 pos = (int_bit_position (field)
6536 + (bit_offset % 64)) / 8 / 8;
6537 for (i = 0; i < num && (i + pos) < words; i++)
6538 classes[i + pos] =
6539 merge_classes (subclasses[i], classes[i + pos]);
6540 }
6541 }
6542 }
6543 break;
6544
6545 case ARRAY_TYPE:
6546 /* Arrays are handled as small records. */
6547 {
6548 int num;
6549 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6550 TREE_TYPE (type), subclasses, bit_offset);
6551 if (!num)
6552 return 0;
6553
6554 /* The partial classes are now full classes. */
6555 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6556 subclasses[0] = X86_64_SSE_CLASS;
6557 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6558 && !((bit_offset % 64) == 0 && bytes == 4))
6559 subclasses[0] = X86_64_INTEGER_CLASS;
6560
6561 for (i = 0; i < words; i++)
6562 classes[i] = subclasses[i % num];
6563
6564 break;
6565 }
6566 case UNION_TYPE:
6567 case QUAL_UNION_TYPE:
6568 /* Unions are similar to RECORD_TYPE but offset is always 0.
6569 */
6570 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6571 {
6572 if (TREE_CODE (field) == FIELD_DECL)
6573 {
6574 int num;
6575
6576 if (TREE_TYPE (field) == error_mark_node)
6577 continue;
6578
6579 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6580 TREE_TYPE (field), subclasses,
6581 bit_offset);
6582 if (!num)
6583 return 0;
6584 for (i = 0; i < num; i++)
6585 classes[i] = merge_classes (subclasses[i], classes[i]);
6586 }
6587 }
6588 break;
6589
6590 default:
6591 gcc_unreachable ();
6592 }
6593
6594 if (words > 2)
6595 {
6596 /* When size > 16 bytes, if the first one isn't
6597 X86_64_SSE_CLASS or any other ones aren't
6598 X86_64_SSEUP_CLASS, everything should be passed in
6599 memory. */
6600 if (classes[0] != X86_64_SSE_CLASS)
6601 return 0;
6602
6603 for (i = 1; i < words; i++)
6604 if (classes[i] != X86_64_SSEUP_CLASS)
6605 return 0;
6606 }
6607
6608 /* Final merger cleanup. */
6609 for (i = 0; i < words; i++)
6610 {
6611 /* If one class is MEMORY, everything should be passed in
6612 memory. */
6613 if (classes[i] == X86_64_MEMORY_CLASS)
6614 return 0;
6615
6616 /* The X86_64_SSEUP_CLASS should be always preceded by
6617 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6618 if (classes[i] == X86_64_SSEUP_CLASS
6619 && classes[i - 1] != X86_64_SSE_CLASS
6620 && classes[i - 1] != X86_64_SSEUP_CLASS)
6621 {
6622 /* The first one should never be X86_64_SSEUP_CLASS. */
6623 gcc_assert (i != 0);
6624 classes[i] = X86_64_SSE_CLASS;
6625 }
6626
6627 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6628 everything should be passed in memory. */
6629 if (classes[i] == X86_64_X87UP_CLASS
6630 && (classes[i - 1] != X86_64_X87_CLASS))
6631 {
6632 static bool warned;
6633
6634 /* The first one should never be X86_64_X87UP_CLASS. */
6635 gcc_assert (i != 0);
6636 if (!warned && warn_psabi)
6637 {
6638 warned = true;
6639 inform (input_location,
6640 "the ABI of passing union with long double"
6641 " has changed in GCC 4.4");
6642 }
6643 return 0;
6644 }
6645 }
6646 return words;
6647 }
6648
6649 /* Compute alignment needed. We align all types to natural boundaries with
6650 exception of XFmode that is aligned to 64bits. */
6651 if (mode != VOIDmode && mode != BLKmode)
6652 {
6653 int mode_alignment = GET_MODE_BITSIZE (mode);
6654
6655 if (mode == XFmode)
6656 mode_alignment = 128;
6657 else if (mode == XCmode)
6658 mode_alignment = 256;
6659 if (COMPLEX_MODE_P (mode))
6660 mode_alignment /= 2;
6661 /* Misaligned fields are always returned in memory. */
6662 if (bit_offset % mode_alignment)
6663 return 0;
6664 }
6665
6666 /* for V1xx modes, just use the base mode */
6667 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6668 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6669 mode = GET_MODE_INNER (mode);
6670
6671 /* Classification of atomic types. */
6672 switch (mode)
6673 {
6674 case SDmode:
6675 case DDmode:
6676 classes[0] = X86_64_SSE_CLASS;
6677 return 1;
6678 case TDmode:
6679 classes[0] = X86_64_SSE_CLASS;
6680 classes[1] = X86_64_SSEUP_CLASS;
6681 return 2;
6682 case DImode:
6683 case SImode:
6684 case HImode:
6685 case QImode:
6686 case CSImode:
6687 case CHImode:
6688 case CQImode:
6689 {
6690 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6691
6692 /* Analyze last 128 bits only. */
6693 size = (size - 1) & 0x7f;
6694
6695 if (size < 32)
6696 {
6697 classes[0] = X86_64_INTEGERSI_CLASS;
6698 return 1;
6699 }
6700 else if (size < 64)
6701 {
6702 classes[0] = X86_64_INTEGER_CLASS;
6703 return 1;
6704 }
6705 else if (size < 64+32)
6706 {
6707 classes[0] = X86_64_INTEGER_CLASS;
6708 classes[1] = X86_64_INTEGERSI_CLASS;
6709 return 2;
6710 }
6711 else if (size < 64+64)
6712 {
6713 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6714 return 2;
6715 }
6716 else
6717 gcc_unreachable ();
6718 }
6719 case CDImode:
6720 case TImode:
6721 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6722 return 2;
6723 case COImode:
6724 case OImode:
6725 /* OImode shouldn't be used directly. */
6726 gcc_unreachable ();
6727 case CTImode:
6728 return 0;
6729 case SFmode:
6730 if (!(bit_offset % 64))
6731 classes[0] = X86_64_SSESF_CLASS;
6732 else
6733 classes[0] = X86_64_SSE_CLASS;
6734 return 1;
6735 case DFmode:
6736 classes[0] = X86_64_SSEDF_CLASS;
6737 return 1;
6738 case XFmode:
6739 classes[0] = X86_64_X87_CLASS;
6740 classes[1] = X86_64_X87UP_CLASS;
6741 return 2;
6742 case TFmode:
6743 classes[0] = X86_64_SSE_CLASS;
6744 classes[1] = X86_64_SSEUP_CLASS;
6745 return 2;
6746 case SCmode:
6747 classes[0] = X86_64_SSE_CLASS;
6748 if (!(bit_offset % 64))
6749 return 1;
6750 else
6751 {
6752 static bool warned;
6753
6754 if (!warned && warn_psabi)
6755 {
6756 warned = true;
6757 inform (input_location,
6758 "the ABI of passing structure with complex float"
6759 " member has changed in GCC 4.4");
6760 }
6761 classes[1] = X86_64_SSESF_CLASS;
6762 return 2;
6763 }
6764 case DCmode:
6765 classes[0] = X86_64_SSEDF_CLASS;
6766 classes[1] = X86_64_SSEDF_CLASS;
6767 return 2;
6768 case XCmode:
6769 classes[0] = X86_64_COMPLEX_X87_CLASS;
6770 return 1;
6771 case TCmode:
6772 /* This modes is larger than 16 bytes. */
6773 return 0;
6774 case V8SFmode:
6775 case V8SImode:
6776 case V32QImode:
6777 case V16HImode:
6778 case V4DFmode:
6779 case V4DImode:
6780 classes[0] = X86_64_SSE_CLASS;
6781 classes[1] = X86_64_SSEUP_CLASS;
6782 classes[2] = X86_64_SSEUP_CLASS;
6783 classes[3] = X86_64_SSEUP_CLASS;
6784 return 4;
6785 case V8DFmode:
6786 case V16SFmode:
6787 case V8DImode:
6788 case V16SImode:
6789 case V32HImode:
6790 case V64QImode:
6791 classes[0] = X86_64_SSE_CLASS;
6792 classes[1] = X86_64_SSEUP_CLASS;
6793 classes[2] = X86_64_SSEUP_CLASS;
6794 classes[3] = X86_64_SSEUP_CLASS;
6795 classes[4] = X86_64_SSEUP_CLASS;
6796 classes[5] = X86_64_SSEUP_CLASS;
6797 classes[6] = X86_64_SSEUP_CLASS;
6798 classes[7] = X86_64_SSEUP_CLASS;
6799 return 8;
6800 case V4SFmode:
6801 case V4SImode:
6802 case V16QImode:
6803 case V8HImode:
6804 case V2DFmode:
6805 case V2DImode:
6806 classes[0] = X86_64_SSE_CLASS;
6807 classes[1] = X86_64_SSEUP_CLASS;
6808 return 2;
6809 case V1TImode:
6810 case V1DImode:
6811 case V2SFmode:
6812 case V2SImode:
6813 case V4HImode:
6814 case V8QImode:
6815 classes[0] = X86_64_SSE_CLASS;
6816 return 1;
6817 case BLKmode:
6818 case VOIDmode:
6819 return 0;
6820 default:
6821 gcc_assert (VECTOR_MODE_P (mode));
6822
6823 if (bytes > 16)
6824 return 0;
6825
6826 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6827
6828 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6829 classes[0] = X86_64_INTEGERSI_CLASS;
6830 else
6831 classes[0] = X86_64_INTEGER_CLASS;
6832 classes[1] = X86_64_INTEGER_CLASS;
6833 return 1 + (bytes > 8);
6834 }
6835 }
6836
6837 /* Examine the argument and return set number of register required in each
6838 class. Return true iff parameter should be passed in memory. */
6839
6840 static bool
6841 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6842 int *int_nregs, int *sse_nregs)
6843 {
6844 enum x86_64_reg_class regclass[MAX_CLASSES];
6845 int n = classify_argument (mode, type, regclass, 0);
6846
6847 *int_nregs = 0;
6848 *sse_nregs = 0;
6849
6850 if (!n)
6851 return true;
6852 for (n--; n >= 0; n--)
6853 switch (regclass[n])
6854 {
6855 case X86_64_INTEGER_CLASS:
6856 case X86_64_INTEGERSI_CLASS:
6857 (*int_nregs)++;
6858 break;
6859 case X86_64_SSE_CLASS:
6860 case X86_64_SSESF_CLASS:
6861 case X86_64_SSEDF_CLASS:
6862 (*sse_nregs)++;
6863 break;
6864 case X86_64_NO_CLASS:
6865 case X86_64_SSEUP_CLASS:
6866 break;
6867 case X86_64_X87_CLASS:
6868 case X86_64_X87UP_CLASS:
6869 case X86_64_COMPLEX_X87_CLASS:
6870 if (!in_return)
6871 return true;
6872 break;
6873 case X86_64_MEMORY_CLASS:
6874 gcc_unreachable ();
6875 }
6876
6877 return false;
6878 }
6879
6880 /* Construct container for the argument used by GCC interface. See
6881 FUNCTION_ARG for the detailed description. */
6882
6883 static rtx
6884 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6885 const_tree type, int in_return, int nintregs, int nsseregs,
6886 const int *intreg, int sse_regno)
6887 {
6888 /* The following variables hold the static issued_error state. */
6889 static bool issued_sse_arg_error;
6890 static bool issued_sse_ret_error;
6891 static bool issued_x87_ret_error;
6892
6893 enum machine_mode tmpmode;
6894 int bytes =
6895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6896 enum x86_64_reg_class regclass[MAX_CLASSES];
6897 int n;
6898 int i;
6899 int nexps = 0;
6900 int needed_sseregs, needed_intregs;
6901 rtx exp[MAX_CLASSES];
6902 rtx ret;
6903
6904 n = classify_argument (mode, type, regclass, 0);
6905 if (!n)
6906 return NULL;
6907 if (examine_argument (mode, type, in_return, &needed_intregs,
6908 &needed_sseregs))
6909 return NULL;
6910 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6911 return NULL;
6912
6913 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6914 some less clueful developer tries to use floating-point anyway. */
6915 if (needed_sseregs && !TARGET_SSE)
6916 {
6917 if (in_return)
6918 {
6919 if (!issued_sse_ret_error)
6920 {
6921 error ("SSE register return with SSE disabled");
6922 issued_sse_ret_error = true;
6923 }
6924 }
6925 else if (!issued_sse_arg_error)
6926 {
6927 error ("SSE register argument with SSE disabled");
6928 issued_sse_arg_error = true;
6929 }
6930 return NULL;
6931 }
6932
6933 /* Likewise, error if the ABI requires us to return values in the
6934 x87 registers and the user specified -mno-80387. */
6935 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6936 for (i = 0; i < n; i++)
6937 if (regclass[i] == X86_64_X87_CLASS
6938 || regclass[i] == X86_64_X87UP_CLASS
6939 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6940 {
6941 if (!issued_x87_ret_error)
6942 {
6943 error ("x87 register return with x87 disabled");
6944 issued_x87_ret_error = true;
6945 }
6946 return NULL;
6947 }
6948
6949 /* First construct simple cases. Avoid SCmode, since we want to use
6950 single register to pass this type. */
6951 if (n == 1 && mode != SCmode)
6952 switch (regclass[0])
6953 {
6954 case X86_64_INTEGER_CLASS:
6955 case X86_64_INTEGERSI_CLASS:
6956 return gen_rtx_REG (mode, intreg[0]);
6957 case X86_64_SSE_CLASS:
6958 case X86_64_SSESF_CLASS:
6959 case X86_64_SSEDF_CLASS:
6960 if (mode != BLKmode)
6961 return gen_reg_or_parallel (mode, orig_mode,
6962 SSE_REGNO (sse_regno));
6963 break;
6964 case X86_64_X87_CLASS:
6965 case X86_64_COMPLEX_X87_CLASS:
6966 return gen_rtx_REG (mode, FIRST_STACK_REG);
6967 case X86_64_NO_CLASS:
6968 /* Zero sized array, struct or class. */
6969 return NULL;
6970 default:
6971 gcc_unreachable ();
6972 }
6973 if (n == 2
6974 && regclass[0] == X86_64_SSE_CLASS
6975 && regclass[1] == X86_64_SSEUP_CLASS
6976 && mode != BLKmode)
6977 return gen_reg_or_parallel (mode, orig_mode,
6978 SSE_REGNO (sse_regno));
6979 if (n == 4
6980 && regclass[0] == X86_64_SSE_CLASS
6981 && regclass[1] == X86_64_SSEUP_CLASS
6982 && regclass[2] == X86_64_SSEUP_CLASS
6983 && regclass[3] == X86_64_SSEUP_CLASS
6984 && mode != BLKmode)
6985 return gen_reg_or_parallel (mode, orig_mode,
6986 SSE_REGNO (sse_regno));
6987 if (n == 8
6988 && regclass[0] == X86_64_SSE_CLASS
6989 && regclass[1] == X86_64_SSEUP_CLASS
6990 && regclass[2] == X86_64_SSEUP_CLASS
6991 && regclass[3] == X86_64_SSEUP_CLASS
6992 && regclass[4] == X86_64_SSEUP_CLASS
6993 && regclass[5] == X86_64_SSEUP_CLASS
6994 && regclass[6] == X86_64_SSEUP_CLASS
6995 && regclass[7] == X86_64_SSEUP_CLASS
6996 && mode != BLKmode)
6997 return gen_reg_or_parallel (mode, orig_mode,
6998 SSE_REGNO (sse_regno));
6999 if (n == 2
7000 && regclass[0] == X86_64_X87_CLASS
7001 && regclass[1] == X86_64_X87UP_CLASS)
7002 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7003
7004 if (n == 2
7005 && regclass[0] == X86_64_INTEGER_CLASS
7006 && regclass[1] == X86_64_INTEGER_CLASS
7007 && (mode == CDImode || mode == TImode)
7008 && intreg[0] + 1 == intreg[1])
7009 return gen_rtx_REG (mode, intreg[0]);
7010
7011 /* Otherwise figure out the entries of the PARALLEL. */
7012 for (i = 0; i < n; i++)
7013 {
7014 int pos;
7015
7016 switch (regclass[i])
7017 {
7018 case X86_64_NO_CLASS:
7019 break;
7020 case X86_64_INTEGER_CLASS:
7021 case X86_64_INTEGERSI_CLASS:
7022 /* Merge TImodes on aligned occasions here too. */
7023 if (i * 8 + 8 > bytes)
7024 tmpmode
7025 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7026 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7027 tmpmode = SImode;
7028 else
7029 tmpmode = DImode;
7030 /* We've requested 24 bytes we
7031 don't have mode for. Use DImode. */
7032 if (tmpmode == BLKmode)
7033 tmpmode = DImode;
7034 exp [nexps++]
7035 = gen_rtx_EXPR_LIST (VOIDmode,
7036 gen_rtx_REG (tmpmode, *intreg),
7037 GEN_INT (i*8));
7038 intreg++;
7039 break;
7040 case X86_64_SSESF_CLASS:
7041 exp [nexps++]
7042 = gen_rtx_EXPR_LIST (VOIDmode,
7043 gen_rtx_REG (SFmode,
7044 SSE_REGNO (sse_regno)),
7045 GEN_INT (i*8));
7046 sse_regno++;
7047 break;
7048 case X86_64_SSEDF_CLASS:
7049 exp [nexps++]
7050 = gen_rtx_EXPR_LIST (VOIDmode,
7051 gen_rtx_REG (DFmode,
7052 SSE_REGNO (sse_regno)),
7053 GEN_INT (i*8));
7054 sse_regno++;
7055 break;
7056 case X86_64_SSE_CLASS:
7057 pos = i;
7058 switch (n)
7059 {
7060 case 1:
7061 tmpmode = DImode;
7062 break;
7063 case 2:
7064 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7065 {
7066 tmpmode = TImode;
7067 i++;
7068 }
7069 else
7070 tmpmode = DImode;
7071 break;
7072 case 4:
7073 gcc_assert (i == 0
7074 && regclass[1] == X86_64_SSEUP_CLASS
7075 && regclass[2] == X86_64_SSEUP_CLASS
7076 && regclass[3] == X86_64_SSEUP_CLASS);
7077 tmpmode = OImode;
7078 i += 3;
7079 break;
7080 case 8:
7081 gcc_assert (i == 0
7082 && regclass[1] == X86_64_SSEUP_CLASS
7083 && regclass[2] == X86_64_SSEUP_CLASS
7084 && regclass[3] == X86_64_SSEUP_CLASS
7085 && regclass[4] == X86_64_SSEUP_CLASS
7086 && regclass[5] == X86_64_SSEUP_CLASS
7087 && regclass[6] == X86_64_SSEUP_CLASS
7088 && regclass[7] == X86_64_SSEUP_CLASS);
7089 tmpmode = XImode;
7090 i += 7;
7091 break;
7092 default:
7093 gcc_unreachable ();
7094 }
7095 exp [nexps++]
7096 = gen_rtx_EXPR_LIST (VOIDmode,
7097 gen_rtx_REG (tmpmode,
7098 SSE_REGNO (sse_regno)),
7099 GEN_INT (pos*8));
7100 sse_regno++;
7101 break;
7102 default:
7103 gcc_unreachable ();
7104 }
7105 }
7106
7107 /* Empty aligned struct, union or class. */
7108 if (nexps == 0)
7109 return NULL;
7110
7111 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7112 for (i = 0; i < nexps; i++)
7113 XVECEXP (ret, 0, i) = exp [i];
7114 return ret;
7115 }
7116
7117 /* Update the data in CUM to advance over an argument of mode MODE
7118 and data type TYPE. (TYPE is null for libcalls where that information
7119 may not be available.) */
7120
7121 static void
7122 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7123 const_tree type, HOST_WIDE_INT bytes,
7124 HOST_WIDE_INT words)
7125 {
7126 switch (mode)
7127 {
7128 default:
7129 break;
7130
7131 case BLKmode:
7132 if (bytes < 0)
7133 break;
7134 /* FALLTHRU */
7135
7136 case DImode:
7137 case SImode:
7138 case HImode:
7139 case QImode:
7140 cum->words += words;
7141 cum->nregs -= words;
7142 cum->regno += words;
7143
7144 if (cum->nregs <= 0)
7145 {
7146 cum->nregs = 0;
7147 cum->regno = 0;
7148 }
7149 break;
7150
7151 case OImode:
7152 /* OImode shouldn't be used directly. */
7153 gcc_unreachable ();
7154
7155 case DFmode:
7156 if (cum->float_in_sse < 2)
7157 break;
7158 case SFmode:
7159 if (cum->float_in_sse < 1)
7160 break;
7161 /* FALLTHRU */
7162
7163 case V8SFmode:
7164 case V8SImode:
7165 case V64QImode:
7166 case V32HImode:
7167 case V16SImode:
7168 case V8DImode:
7169 case V16SFmode:
7170 case V8DFmode:
7171 case V32QImode:
7172 case V16HImode:
7173 case V4DFmode:
7174 case V4DImode:
7175 case TImode:
7176 case V16QImode:
7177 case V8HImode:
7178 case V4SImode:
7179 case V2DImode:
7180 case V4SFmode:
7181 case V2DFmode:
7182 if (!type || !AGGREGATE_TYPE_P (type))
7183 {
7184 cum->sse_words += words;
7185 cum->sse_nregs -= 1;
7186 cum->sse_regno += 1;
7187 if (cum->sse_nregs <= 0)
7188 {
7189 cum->sse_nregs = 0;
7190 cum->sse_regno = 0;
7191 }
7192 }
7193 break;
7194
7195 case V8QImode:
7196 case V4HImode:
7197 case V2SImode:
7198 case V2SFmode:
7199 case V1TImode:
7200 case V1DImode:
7201 if (!type || !AGGREGATE_TYPE_P (type))
7202 {
7203 cum->mmx_words += words;
7204 cum->mmx_nregs -= 1;
7205 cum->mmx_regno += 1;
7206 if (cum->mmx_nregs <= 0)
7207 {
7208 cum->mmx_nregs = 0;
7209 cum->mmx_regno = 0;
7210 }
7211 }
7212 break;
7213 }
7214 }
7215
7216 static void
7217 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7218 const_tree type, HOST_WIDE_INT words, bool named)
7219 {
7220 int int_nregs, sse_nregs;
7221
7222 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7223 if (!named && (VALID_AVX512F_REG_MODE (mode)
7224 || VALID_AVX256_REG_MODE (mode)))
7225 return;
7226
7227 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7228 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7229 {
7230 cum->nregs -= int_nregs;
7231 cum->sse_nregs -= sse_nregs;
7232 cum->regno += int_nregs;
7233 cum->sse_regno += sse_nregs;
7234 }
7235 else
7236 {
7237 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7238 cum->words = (cum->words + align - 1) & ~(align - 1);
7239 cum->words += words;
7240 }
7241 }
7242
7243 static void
7244 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7245 HOST_WIDE_INT words)
7246 {
7247 /* Otherwise, this should be passed indirect. */
7248 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7249
7250 cum->words += words;
7251 if (cum->nregs > 0)
7252 {
7253 cum->nregs -= 1;
7254 cum->regno += 1;
7255 }
7256 }
7257
7258 /* Update the data in CUM to advance over an argument of mode MODE and
7259 data type TYPE. (TYPE is null for libcalls where that information
7260 may not be available.) */
7261
7262 static void
7263 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7264 const_tree type, bool named)
7265 {
7266 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7267 HOST_WIDE_INT bytes, words;
7268
7269 if (mode == BLKmode)
7270 bytes = int_size_in_bytes (type);
7271 else
7272 bytes = GET_MODE_SIZE (mode);
7273 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7274
7275 if (type)
7276 mode = type_natural_mode (type, NULL, false);
7277
7278 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7279 function_arg_advance_ms_64 (cum, bytes, words);
7280 else if (TARGET_64BIT)
7281 function_arg_advance_64 (cum, mode, type, words, named);
7282 else
7283 function_arg_advance_32 (cum, mode, type, bytes, words);
7284 }
7285
7286 /* Define where to put the arguments to a function.
7287 Value is zero to push the argument on the stack,
7288 or a hard register in which to store the argument.
7289
7290 MODE is the argument's machine mode.
7291 TYPE is the data type of the argument (as a tree).
7292 This is null for libcalls where that information may
7293 not be available.
7294 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7295 the preceding args and about the function being called.
7296 NAMED is nonzero if this argument is a named parameter
7297 (otherwise it is an extra parameter matching an ellipsis). */
7298
7299 static rtx
7300 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7301 enum machine_mode orig_mode, const_tree type,
7302 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7303 {
7304 /* Avoid the AL settings for the Unix64 ABI. */
7305 if (mode == VOIDmode)
7306 return constm1_rtx;
7307
7308 switch (mode)
7309 {
7310 default:
7311 break;
7312
7313 case BLKmode:
7314 if (bytes < 0)
7315 break;
7316 /* FALLTHRU */
7317 case DImode:
7318 case SImode:
7319 case HImode:
7320 case QImode:
7321 if (words <= cum->nregs)
7322 {
7323 int regno = cum->regno;
7324
7325 /* Fastcall allocates the first two DWORD (SImode) or
7326 smaller arguments to ECX and EDX if it isn't an
7327 aggregate type . */
7328 if (cum->fastcall)
7329 {
7330 if (mode == BLKmode
7331 || mode == DImode
7332 || (type && AGGREGATE_TYPE_P (type)))
7333 break;
7334
7335 /* ECX not EAX is the first allocated register. */
7336 if (regno == AX_REG)
7337 regno = CX_REG;
7338 }
7339 return gen_rtx_REG (mode, regno);
7340 }
7341 break;
7342
7343 case DFmode:
7344 if (cum->float_in_sse < 2)
7345 break;
7346 case SFmode:
7347 if (cum->float_in_sse < 1)
7348 break;
7349 /* FALLTHRU */
7350 case TImode:
7351 /* In 32bit, we pass TImode in xmm registers. */
7352 case V16QImode:
7353 case V8HImode:
7354 case V4SImode:
7355 case V2DImode:
7356 case V4SFmode:
7357 case V2DFmode:
7358 if (!type || !AGGREGATE_TYPE_P (type))
7359 {
7360 if (cum->sse_nregs)
7361 return gen_reg_or_parallel (mode, orig_mode,
7362 cum->sse_regno + FIRST_SSE_REG);
7363 }
7364 break;
7365
7366 case OImode:
7367 case XImode:
7368 /* OImode and XImode shouldn't be used directly. */
7369 gcc_unreachable ();
7370
7371 case V64QImode:
7372 case V32HImode:
7373 case V16SImode:
7374 case V8DImode:
7375 case V16SFmode:
7376 case V8DFmode:
7377 case V8SFmode:
7378 case V8SImode:
7379 case V32QImode:
7380 case V16HImode:
7381 case V4DFmode:
7382 case V4DImode:
7383 if (!type || !AGGREGATE_TYPE_P (type))
7384 {
7385 if (cum->sse_nregs)
7386 return gen_reg_or_parallel (mode, orig_mode,
7387 cum->sse_regno + FIRST_SSE_REG);
7388 }
7389 break;
7390
7391 case V8QImode:
7392 case V4HImode:
7393 case V2SImode:
7394 case V2SFmode:
7395 case V1TImode:
7396 case V1DImode:
7397 if (!type || !AGGREGATE_TYPE_P (type))
7398 {
7399 if (cum->mmx_nregs)
7400 return gen_reg_or_parallel (mode, orig_mode,
7401 cum->mmx_regno + FIRST_MMX_REG);
7402 }
7403 break;
7404 }
7405
7406 return NULL_RTX;
7407 }
7408
7409 static rtx
7410 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7411 enum machine_mode orig_mode, const_tree type, bool named)
7412 {
7413 /* Handle a hidden AL argument containing number of registers
7414 for varargs x86-64 functions. */
7415 if (mode == VOIDmode)
7416 return GEN_INT (cum->maybe_vaarg
7417 ? (cum->sse_nregs < 0
7418 ? X86_64_SSE_REGPARM_MAX
7419 : cum->sse_regno)
7420 : -1);
7421
7422 switch (mode)
7423 {
7424 default:
7425 break;
7426
7427 case V8SFmode:
7428 case V8SImode:
7429 case V32QImode:
7430 case V16HImode:
7431 case V4DFmode:
7432 case V4DImode:
7433 case V16SFmode:
7434 case V16SImode:
7435 case V64QImode:
7436 case V32HImode:
7437 case V8DFmode:
7438 case V8DImode:
7439 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7440 if (!named)
7441 return NULL;
7442 break;
7443 }
7444
7445 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7446 cum->sse_nregs,
7447 &x86_64_int_parameter_registers [cum->regno],
7448 cum->sse_regno);
7449 }
7450
7451 static rtx
7452 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7453 enum machine_mode orig_mode, bool named,
7454 HOST_WIDE_INT bytes)
7455 {
7456 unsigned int regno;
7457
7458 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7459 We use value of -2 to specify that current function call is MSABI. */
7460 if (mode == VOIDmode)
7461 return GEN_INT (-2);
7462
7463 /* If we've run out of registers, it goes on the stack. */
7464 if (cum->nregs == 0)
7465 return NULL_RTX;
7466
7467 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7468
7469 /* Only floating point modes are passed in anything but integer regs. */
7470 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7471 {
7472 if (named)
7473 regno = cum->regno + FIRST_SSE_REG;
7474 else
7475 {
7476 rtx t1, t2;
7477
7478 /* Unnamed floating parameters are passed in both the
7479 SSE and integer registers. */
7480 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7481 t2 = gen_rtx_REG (mode, regno);
7482 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7483 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7484 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7485 }
7486 }
7487 /* Handle aggregated types passed in register. */
7488 if (orig_mode == BLKmode)
7489 {
7490 if (bytes > 0 && bytes <= 8)
7491 mode = (bytes > 4 ? DImode : SImode);
7492 if (mode == BLKmode)
7493 mode = DImode;
7494 }
7495
7496 return gen_reg_or_parallel (mode, orig_mode, regno);
7497 }
7498
7499 /* Return where to put the arguments to a function.
7500 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7501
7502 MODE is the argument's machine mode. TYPE is the data type of the
7503 argument. It is null for libcalls where that information may not be
7504 available. CUM gives information about the preceding args and about
7505 the function being called. NAMED is nonzero if this argument is a
7506 named parameter (otherwise it is an extra parameter matching an
7507 ellipsis). */
7508
7509 static rtx
7510 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7511 const_tree type, bool named)
7512 {
7513 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7514 enum machine_mode mode = omode;
7515 HOST_WIDE_INT bytes, words;
7516 rtx arg;
7517
7518 if (mode == BLKmode)
7519 bytes = int_size_in_bytes (type);
7520 else
7521 bytes = GET_MODE_SIZE (mode);
7522 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7523
7524 /* To simplify the code below, represent vector types with a vector mode
7525 even if MMX/SSE are not active. */
7526 if (type && TREE_CODE (type) == VECTOR_TYPE)
7527 mode = type_natural_mode (type, cum, false);
7528
7529 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7530 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7531 else if (TARGET_64BIT)
7532 arg = function_arg_64 (cum, mode, omode, type, named);
7533 else
7534 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7535
7536 return arg;
7537 }
7538
7539 /* A C expression that indicates when an argument must be passed by
7540 reference. If nonzero for an argument, a copy of that argument is
7541 made in memory and a pointer to the argument is passed instead of
7542 the argument itself. The pointer is passed in whatever way is
7543 appropriate for passing a pointer to that type. */
7544
7545 static bool
7546 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7547 const_tree type, bool named ATTRIBUTE_UNUSED)
7548 {
7549 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7550
7551 /* See Windows x64 Software Convention. */
7552 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7553 {
7554 int msize = (int) GET_MODE_SIZE (mode);
7555 if (type)
7556 {
7557 /* Arrays are passed by reference. */
7558 if (TREE_CODE (type) == ARRAY_TYPE)
7559 return true;
7560
7561 if (AGGREGATE_TYPE_P (type))
7562 {
7563 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7564 are passed by reference. */
7565 msize = int_size_in_bytes (type);
7566 }
7567 }
7568
7569 /* __m128 is passed by reference. */
7570 switch (msize) {
7571 case 1: case 2: case 4: case 8:
7572 break;
7573 default:
7574 return true;
7575 }
7576 }
7577 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7578 return 1;
7579
7580 return 0;
7581 }
7582
7583 /* Return true when TYPE should be 128bit aligned for 32bit argument
7584 passing ABI. XXX: This function is obsolete and is only used for
7585 checking psABI compatibility with previous versions of GCC. */
7586
7587 static bool
7588 ix86_compat_aligned_value_p (const_tree type)
7589 {
7590 enum machine_mode mode = TYPE_MODE (type);
7591 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7592 || mode == TDmode
7593 || mode == TFmode
7594 || mode == TCmode)
7595 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7596 return true;
7597 if (TYPE_ALIGN (type) < 128)
7598 return false;
7599
7600 if (AGGREGATE_TYPE_P (type))
7601 {
7602 /* Walk the aggregates recursively. */
7603 switch (TREE_CODE (type))
7604 {
7605 case RECORD_TYPE:
7606 case UNION_TYPE:
7607 case QUAL_UNION_TYPE:
7608 {
7609 tree field;
7610
7611 /* Walk all the structure fields. */
7612 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7613 {
7614 if (TREE_CODE (field) == FIELD_DECL
7615 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7616 return true;
7617 }
7618 break;
7619 }
7620
7621 case ARRAY_TYPE:
7622 /* Just for use if some languages passes arrays by value. */
7623 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7624 return true;
7625 break;
7626
7627 default:
7628 gcc_unreachable ();
7629 }
7630 }
7631 return false;
7632 }
7633
7634 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7635 XXX: This function is obsolete and is only used for checking psABI
7636 compatibility with previous versions of GCC. */
7637
7638 static unsigned int
7639 ix86_compat_function_arg_boundary (enum machine_mode mode,
7640 const_tree type, unsigned int align)
7641 {
7642 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7643 natural boundaries. */
7644 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7645 {
7646 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7647 make an exception for SSE modes since these require 128bit
7648 alignment.
7649
7650 The handling here differs from field_alignment. ICC aligns MMX
7651 arguments to 4 byte boundaries, while structure fields are aligned
7652 to 8 byte boundaries. */
7653 if (!type)
7654 {
7655 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7656 align = PARM_BOUNDARY;
7657 }
7658 else
7659 {
7660 if (!ix86_compat_aligned_value_p (type))
7661 align = PARM_BOUNDARY;
7662 }
7663 }
7664 if (align > BIGGEST_ALIGNMENT)
7665 align = BIGGEST_ALIGNMENT;
7666 return align;
7667 }
7668
7669 /* Return true when TYPE should be 128bit aligned for 32bit argument
7670 passing ABI. */
7671
7672 static bool
7673 ix86_contains_aligned_value_p (const_tree type)
7674 {
7675 enum machine_mode mode = TYPE_MODE (type);
7676
7677 if (mode == XFmode || mode == XCmode)
7678 return false;
7679
7680 if (TYPE_ALIGN (type) < 128)
7681 return false;
7682
7683 if (AGGREGATE_TYPE_P (type))
7684 {
7685 /* Walk the aggregates recursively. */
7686 switch (TREE_CODE (type))
7687 {
7688 case RECORD_TYPE:
7689 case UNION_TYPE:
7690 case QUAL_UNION_TYPE:
7691 {
7692 tree field;
7693
7694 /* Walk all the structure fields. */
7695 for (field = TYPE_FIELDS (type);
7696 field;
7697 field = DECL_CHAIN (field))
7698 {
7699 if (TREE_CODE (field) == FIELD_DECL
7700 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7701 return true;
7702 }
7703 break;
7704 }
7705
7706 case ARRAY_TYPE:
7707 /* Just for use if some languages passes arrays by value. */
7708 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7709 return true;
7710 break;
7711
7712 default:
7713 gcc_unreachable ();
7714 }
7715 }
7716 else
7717 return TYPE_ALIGN (type) >= 128;
7718
7719 return false;
7720 }
7721
7722 /* Gives the alignment boundary, in bits, of an argument with the
7723 specified mode and type. */
7724
7725 static unsigned int
7726 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7727 {
7728 unsigned int align;
7729 if (type)
7730 {
7731 /* Since the main variant type is used for call, we convert it to
7732 the main variant type. */
7733 type = TYPE_MAIN_VARIANT (type);
7734 align = TYPE_ALIGN (type);
7735 }
7736 else
7737 align = GET_MODE_ALIGNMENT (mode);
7738 if (align < PARM_BOUNDARY)
7739 align = PARM_BOUNDARY;
7740 else
7741 {
7742 static bool warned;
7743 unsigned int saved_align = align;
7744
7745 if (!TARGET_64BIT)
7746 {
7747 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7748 if (!type)
7749 {
7750 if (mode == XFmode || mode == XCmode)
7751 align = PARM_BOUNDARY;
7752 }
7753 else if (!ix86_contains_aligned_value_p (type))
7754 align = PARM_BOUNDARY;
7755
7756 if (align < 128)
7757 align = PARM_BOUNDARY;
7758 }
7759
7760 if (warn_psabi
7761 && !warned
7762 && align != ix86_compat_function_arg_boundary (mode, type,
7763 saved_align))
7764 {
7765 warned = true;
7766 inform (input_location,
7767 "The ABI for passing parameters with %d-byte"
7768 " alignment has changed in GCC 4.6",
7769 align / BITS_PER_UNIT);
7770 }
7771 }
7772
7773 return align;
7774 }
7775
7776 /* Return true if N is a possible register number of function value. */
7777
7778 static bool
7779 ix86_function_value_regno_p (const unsigned int regno)
7780 {
7781 switch (regno)
7782 {
7783 case AX_REG:
7784 return true;
7785 case DX_REG:
7786 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7787 case DI_REG:
7788 case SI_REG:
7789 return TARGET_64BIT && ix86_abi != MS_ABI;
7790
7791 /* Complex values are returned in %st(0)/%st(1) pair. */
7792 case ST0_REG:
7793 case ST1_REG:
7794 /* TODO: The function should depend on current function ABI but
7795 builtins.c would need updating then. Therefore we use the
7796 default ABI. */
7797 if (TARGET_64BIT && ix86_abi == MS_ABI)
7798 return false;
7799 return TARGET_FLOAT_RETURNS_IN_80387;
7800
7801 /* Complex values are returned in %xmm0/%xmm1 pair. */
7802 case XMM0_REG:
7803 case XMM1_REG:
7804 return TARGET_SSE;
7805
7806 case MM0_REG:
7807 if (TARGET_MACHO || TARGET_64BIT)
7808 return false;
7809 return TARGET_MMX;
7810 }
7811
7812 return false;
7813 }
7814
7815 /* Define how to find the value returned by a function.
7816 VALTYPE is the data type of the value (as a tree).
7817 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7818 otherwise, FUNC is 0. */
7819
7820 static rtx
7821 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7822 const_tree fntype, const_tree fn)
7823 {
7824 unsigned int regno;
7825
7826 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7827 we normally prevent this case when mmx is not available. However
7828 some ABIs may require the result to be returned like DImode. */
7829 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7830 regno = FIRST_MMX_REG;
7831
7832 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7833 we prevent this case when sse is not available. However some ABIs
7834 may require the result to be returned like integer TImode. */
7835 else if (mode == TImode
7836 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7837 regno = FIRST_SSE_REG;
7838
7839 /* 32-byte vector modes in %ymm0. */
7840 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7841 regno = FIRST_SSE_REG;
7842
7843 /* 64-byte vector modes in %zmm0. */
7844 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7845 regno = FIRST_SSE_REG;
7846
7847 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7848 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7849 regno = FIRST_FLOAT_REG;
7850 else
7851 /* Most things go in %eax. */
7852 regno = AX_REG;
7853
7854 /* Override FP return register with %xmm0 for local functions when
7855 SSE math is enabled or for functions with sseregparm attribute. */
7856 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7857 {
7858 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7859 if ((sse_level >= 1 && mode == SFmode)
7860 || (sse_level == 2 && mode == DFmode))
7861 regno = FIRST_SSE_REG;
7862 }
7863
7864 /* OImode shouldn't be used directly. */
7865 gcc_assert (mode != OImode);
7866
7867 return gen_rtx_REG (orig_mode, regno);
7868 }
7869
7870 static rtx
7871 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7872 const_tree valtype)
7873 {
7874 rtx ret;
7875
7876 /* Handle libcalls, which don't provide a type node. */
7877 if (valtype == NULL)
7878 {
7879 unsigned int regno;
7880
7881 switch (mode)
7882 {
7883 case SFmode:
7884 case SCmode:
7885 case DFmode:
7886 case DCmode:
7887 case TFmode:
7888 case SDmode:
7889 case DDmode:
7890 case TDmode:
7891 regno = FIRST_SSE_REG;
7892 break;
7893 case XFmode:
7894 case XCmode:
7895 regno = FIRST_FLOAT_REG;
7896 break;
7897 case TCmode:
7898 return NULL;
7899 default:
7900 regno = AX_REG;
7901 }
7902
7903 return gen_rtx_REG (mode, regno);
7904 }
7905 else if (POINTER_TYPE_P (valtype))
7906 {
7907 /* Pointers are always returned in word_mode. */
7908 mode = word_mode;
7909 }
7910
7911 ret = construct_container (mode, orig_mode, valtype, 1,
7912 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7913 x86_64_int_return_registers, 0);
7914
7915 /* For zero sized structures, construct_container returns NULL, but we
7916 need to keep rest of compiler happy by returning meaningful value. */
7917 if (!ret)
7918 ret = gen_rtx_REG (orig_mode, AX_REG);
7919
7920 return ret;
7921 }
7922
7923 static rtx
7924 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7925 const_tree valtype)
7926 {
7927 unsigned int regno = AX_REG;
7928
7929 if (TARGET_SSE)
7930 {
7931 switch (GET_MODE_SIZE (mode))
7932 {
7933 case 16:
7934 if (valtype != NULL_TREE
7935 && !VECTOR_INTEGER_TYPE_P (valtype)
7936 && !VECTOR_INTEGER_TYPE_P (valtype)
7937 && !INTEGRAL_TYPE_P (valtype)
7938 && !VECTOR_FLOAT_TYPE_P (valtype))
7939 break;
7940 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7941 && !COMPLEX_MODE_P (mode))
7942 regno = FIRST_SSE_REG;
7943 break;
7944 case 8:
7945 case 4:
7946 if (mode == SFmode || mode == DFmode)
7947 regno = FIRST_SSE_REG;
7948 break;
7949 default:
7950 break;
7951 }
7952 }
7953 return gen_rtx_REG (orig_mode, regno);
7954 }
7955
7956 static rtx
7957 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7958 enum machine_mode orig_mode, enum machine_mode mode)
7959 {
7960 const_tree fn, fntype;
7961
7962 fn = NULL_TREE;
7963 if (fntype_or_decl && DECL_P (fntype_or_decl))
7964 fn = fntype_or_decl;
7965 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7966
7967 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7968 return function_value_ms_64 (orig_mode, mode, valtype);
7969 else if (TARGET_64BIT)
7970 return function_value_64 (orig_mode, mode, valtype);
7971 else
7972 return function_value_32 (orig_mode, mode, fntype, fn);
7973 }
7974
7975 static rtx
7976 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7977 bool outgoing ATTRIBUTE_UNUSED)
7978 {
7979 enum machine_mode mode, orig_mode;
7980
7981 orig_mode = TYPE_MODE (valtype);
7982 mode = type_natural_mode (valtype, NULL, true);
7983 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7984 }
7985
7986 /* Pointer function arguments and return values are promoted to
7987 word_mode. */
7988
7989 static enum machine_mode
7990 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7991 int *punsignedp, const_tree fntype,
7992 int for_return)
7993 {
7994 if (type != NULL_TREE && POINTER_TYPE_P (type))
7995 {
7996 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7997 return word_mode;
7998 }
7999 return default_promote_function_mode (type, mode, punsignedp, fntype,
8000 for_return);
8001 }
8002
8003 /* Return true if a structure, union or array with MODE containing FIELD
8004 should be accessed using BLKmode. */
8005
8006 static bool
8007 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8008 {
8009 /* Union with XFmode must be in BLKmode. */
8010 return (mode == XFmode
8011 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8012 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8013 }
8014
8015 rtx
8016 ix86_libcall_value (enum machine_mode mode)
8017 {
8018 return ix86_function_value_1 (NULL, NULL, mode, mode);
8019 }
8020
8021 /* Return true iff type is returned in memory. */
8022
8023 static bool
8024 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8025 {
8026 #ifdef SUBTARGET_RETURN_IN_MEMORY
8027 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8028 #else
8029 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8030 HOST_WIDE_INT size;
8031
8032 if (TARGET_64BIT)
8033 {
8034 if (ix86_function_type_abi (fntype) == MS_ABI)
8035 {
8036 size = int_size_in_bytes (type);
8037
8038 /* __m128 is returned in xmm0. */
8039 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8040 || INTEGRAL_TYPE_P (type)
8041 || VECTOR_FLOAT_TYPE_P (type))
8042 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8043 && !COMPLEX_MODE_P (mode)
8044 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8045 return false;
8046
8047 /* Otherwise, the size must be exactly in [1248]. */
8048 return size != 1 && size != 2 && size != 4 && size != 8;
8049 }
8050 else
8051 {
8052 int needed_intregs, needed_sseregs;
8053
8054 return examine_argument (mode, type, 1,
8055 &needed_intregs, &needed_sseregs);
8056 }
8057 }
8058 else
8059 {
8060 if (mode == BLKmode)
8061 return true;
8062
8063 size = int_size_in_bytes (type);
8064
8065 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8066 return false;
8067
8068 if (VECTOR_MODE_P (mode) || mode == TImode)
8069 {
8070 /* User-created vectors small enough to fit in EAX. */
8071 if (size < 8)
8072 return false;
8073
8074 /* Unless ABI prescibes otherwise,
8075 MMX/3dNow values are returned in MM0 if available. */
8076
8077 if (size == 8)
8078 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8079
8080 /* SSE values are returned in XMM0 if available. */
8081 if (size == 16)
8082 return !TARGET_SSE;
8083
8084 /* AVX values are returned in YMM0 if available. */
8085 if (size == 32)
8086 return !TARGET_AVX;
8087
8088 /* AVX512F values are returned in ZMM0 if available. */
8089 if (size == 64)
8090 return !TARGET_AVX512F;
8091 }
8092
8093 if (mode == XFmode)
8094 return false;
8095
8096 if (size > 12)
8097 return true;
8098
8099 /* OImode shouldn't be used directly. */
8100 gcc_assert (mode != OImode);
8101
8102 return false;
8103 }
8104 #endif
8105 }
8106
8107 \f
8108 /* Create the va_list data type. */
8109
8110 /* Returns the calling convention specific va_list date type.
8111 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8112
8113 static tree
8114 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8115 {
8116 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8117
8118 /* For i386 we use plain pointer to argument area. */
8119 if (!TARGET_64BIT || abi == MS_ABI)
8120 return build_pointer_type (char_type_node);
8121
8122 record = lang_hooks.types.make_type (RECORD_TYPE);
8123 type_decl = build_decl (BUILTINS_LOCATION,
8124 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8125
8126 f_gpr = build_decl (BUILTINS_LOCATION,
8127 FIELD_DECL, get_identifier ("gp_offset"),
8128 unsigned_type_node);
8129 f_fpr = build_decl (BUILTINS_LOCATION,
8130 FIELD_DECL, get_identifier ("fp_offset"),
8131 unsigned_type_node);
8132 f_ovf = build_decl (BUILTINS_LOCATION,
8133 FIELD_DECL, get_identifier ("overflow_arg_area"),
8134 ptr_type_node);
8135 f_sav = build_decl (BUILTINS_LOCATION,
8136 FIELD_DECL, get_identifier ("reg_save_area"),
8137 ptr_type_node);
8138
8139 va_list_gpr_counter_field = f_gpr;
8140 va_list_fpr_counter_field = f_fpr;
8141
8142 DECL_FIELD_CONTEXT (f_gpr) = record;
8143 DECL_FIELD_CONTEXT (f_fpr) = record;
8144 DECL_FIELD_CONTEXT (f_ovf) = record;
8145 DECL_FIELD_CONTEXT (f_sav) = record;
8146
8147 TYPE_STUB_DECL (record) = type_decl;
8148 TYPE_NAME (record) = type_decl;
8149 TYPE_FIELDS (record) = f_gpr;
8150 DECL_CHAIN (f_gpr) = f_fpr;
8151 DECL_CHAIN (f_fpr) = f_ovf;
8152 DECL_CHAIN (f_ovf) = f_sav;
8153
8154 layout_type (record);
8155
8156 /* The correct type is an array type of one element. */
8157 return build_array_type (record, build_index_type (size_zero_node));
8158 }
8159
8160 /* Setup the builtin va_list data type and for 64-bit the additional
8161 calling convention specific va_list data types. */
8162
8163 static tree
8164 ix86_build_builtin_va_list (void)
8165 {
8166 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8167
8168 /* Initialize abi specific va_list builtin types. */
8169 if (TARGET_64BIT)
8170 {
8171 tree t;
8172 if (ix86_abi == MS_ABI)
8173 {
8174 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8175 if (TREE_CODE (t) != RECORD_TYPE)
8176 t = build_variant_type_copy (t);
8177 sysv_va_list_type_node = t;
8178 }
8179 else
8180 {
8181 t = ret;
8182 if (TREE_CODE (t) != RECORD_TYPE)
8183 t = build_variant_type_copy (t);
8184 sysv_va_list_type_node = t;
8185 }
8186 if (ix86_abi != MS_ABI)
8187 {
8188 t = ix86_build_builtin_va_list_abi (MS_ABI);
8189 if (TREE_CODE (t) != RECORD_TYPE)
8190 t = build_variant_type_copy (t);
8191 ms_va_list_type_node = t;
8192 }
8193 else
8194 {
8195 t = ret;
8196 if (TREE_CODE (t) != RECORD_TYPE)
8197 t = build_variant_type_copy (t);
8198 ms_va_list_type_node = t;
8199 }
8200 }
8201
8202 return ret;
8203 }
8204
8205 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8206
8207 static void
8208 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8209 {
8210 rtx save_area, mem;
8211 alias_set_type set;
8212 int i, max;
8213
8214 /* GPR size of varargs save area. */
8215 if (cfun->va_list_gpr_size)
8216 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8217 else
8218 ix86_varargs_gpr_size = 0;
8219
8220 /* FPR size of varargs save area. We don't need it if we don't pass
8221 anything in SSE registers. */
8222 if (TARGET_SSE && cfun->va_list_fpr_size)
8223 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8224 else
8225 ix86_varargs_fpr_size = 0;
8226
8227 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8228 return;
8229
8230 save_area = frame_pointer_rtx;
8231 set = get_varargs_alias_set ();
8232
8233 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8234 if (max > X86_64_REGPARM_MAX)
8235 max = X86_64_REGPARM_MAX;
8236
8237 for (i = cum->regno; i < max; i++)
8238 {
8239 mem = gen_rtx_MEM (word_mode,
8240 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8241 MEM_NOTRAP_P (mem) = 1;
8242 set_mem_alias_set (mem, set);
8243 emit_move_insn (mem,
8244 gen_rtx_REG (word_mode,
8245 x86_64_int_parameter_registers[i]));
8246 }
8247
8248 if (ix86_varargs_fpr_size)
8249 {
8250 enum machine_mode smode;
8251 rtx label, test;
8252
8253 /* Now emit code to save SSE registers. The AX parameter contains number
8254 of SSE parameter registers used to call this function, though all we
8255 actually check here is the zero/non-zero status. */
8256
8257 label = gen_label_rtx ();
8258 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8259 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8260 label));
8261
8262 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8263 we used movdqa (i.e. TImode) instead? Perhaps even better would
8264 be if we could determine the real mode of the data, via a hook
8265 into pass_stdarg. Ignore all that for now. */
8266 smode = V4SFmode;
8267 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8268 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8269
8270 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8271 if (max > X86_64_SSE_REGPARM_MAX)
8272 max = X86_64_SSE_REGPARM_MAX;
8273
8274 for (i = cum->sse_regno; i < max; ++i)
8275 {
8276 mem = plus_constant (Pmode, save_area,
8277 i * 16 + ix86_varargs_gpr_size);
8278 mem = gen_rtx_MEM (smode, mem);
8279 MEM_NOTRAP_P (mem) = 1;
8280 set_mem_alias_set (mem, set);
8281 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8282
8283 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8284 }
8285
8286 emit_label (label);
8287 }
8288 }
8289
8290 static void
8291 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8292 {
8293 alias_set_type set = get_varargs_alias_set ();
8294 int i;
8295
8296 /* Reset to zero, as there might be a sysv vaarg used
8297 before. */
8298 ix86_varargs_gpr_size = 0;
8299 ix86_varargs_fpr_size = 0;
8300
8301 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8302 {
8303 rtx reg, mem;
8304
8305 mem = gen_rtx_MEM (Pmode,
8306 plus_constant (Pmode, virtual_incoming_args_rtx,
8307 i * UNITS_PER_WORD));
8308 MEM_NOTRAP_P (mem) = 1;
8309 set_mem_alias_set (mem, set);
8310
8311 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8312 emit_move_insn (mem, reg);
8313 }
8314 }
8315
8316 static void
8317 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8318 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8319 int no_rtl)
8320 {
8321 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8322 CUMULATIVE_ARGS next_cum;
8323 tree fntype;
8324
8325 /* This argument doesn't appear to be used anymore. Which is good,
8326 because the old code here didn't suppress rtl generation. */
8327 gcc_assert (!no_rtl);
8328
8329 if (!TARGET_64BIT)
8330 return;
8331
8332 fntype = TREE_TYPE (current_function_decl);
8333
8334 /* For varargs, we do not want to skip the dummy va_dcl argument.
8335 For stdargs, we do want to skip the last named argument. */
8336 next_cum = *cum;
8337 if (stdarg_p (fntype))
8338 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8339 true);
8340
8341 if (cum->call_abi == MS_ABI)
8342 setup_incoming_varargs_ms_64 (&next_cum);
8343 else
8344 setup_incoming_varargs_64 (&next_cum);
8345 }
8346
8347 /* Checks if TYPE is of kind va_list char *. */
8348
8349 static bool
8350 is_va_list_char_pointer (tree type)
8351 {
8352 tree canonic;
8353
8354 /* For 32-bit it is always true. */
8355 if (!TARGET_64BIT)
8356 return true;
8357 canonic = ix86_canonical_va_list_type (type);
8358 return (canonic == ms_va_list_type_node
8359 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8360 }
8361
8362 /* Implement va_start. */
8363
8364 static void
8365 ix86_va_start (tree valist, rtx nextarg)
8366 {
8367 HOST_WIDE_INT words, n_gpr, n_fpr;
8368 tree f_gpr, f_fpr, f_ovf, f_sav;
8369 tree gpr, fpr, ovf, sav, t;
8370 tree type;
8371 rtx ovf_rtx;
8372
8373 if (flag_split_stack
8374 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8375 {
8376 unsigned int scratch_regno;
8377
8378 /* When we are splitting the stack, we can't refer to the stack
8379 arguments using internal_arg_pointer, because they may be on
8380 the old stack. The split stack prologue will arrange to
8381 leave a pointer to the old stack arguments in a scratch
8382 register, which we here copy to a pseudo-register. The split
8383 stack prologue can't set the pseudo-register directly because
8384 it (the prologue) runs before any registers have been saved. */
8385
8386 scratch_regno = split_stack_prologue_scratch_regno ();
8387 if (scratch_regno != INVALID_REGNUM)
8388 {
8389 rtx reg, seq;
8390
8391 reg = gen_reg_rtx (Pmode);
8392 cfun->machine->split_stack_varargs_pointer = reg;
8393
8394 start_sequence ();
8395 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8396 seq = get_insns ();
8397 end_sequence ();
8398
8399 push_topmost_sequence ();
8400 emit_insn_after (seq, entry_of_function ());
8401 pop_topmost_sequence ();
8402 }
8403 }
8404
8405 /* Only 64bit target needs something special. */
8406 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8407 {
8408 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8409 std_expand_builtin_va_start (valist, nextarg);
8410 else
8411 {
8412 rtx va_r, next;
8413
8414 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8415 next = expand_binop (ptr_mode, add_optab,
8416 cfun->machine->split_stack_varargs_pointer,
8417 crtl->args.arg_offset_rtx,
8418 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8419 convert_move (va_r, next, 0);
8420 }
8421 return;
8422 }
8423
8424 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8425 f_fpr = DECL_CHAIN (f_gpr);
8426 f_ovf = DECL_CHAIN (f_fpr);
8427 f_sav = DECL_CHAIN (f_ovf);
8428
8429 valist = build_simple_mem_ref (valist);
8430 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8431 /* The following should be folded into the MEM_REF offset. */
8432 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8433 f_gpr, NULL_TREE);
8434 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8435 f_fpr, NULL_TREE);
8436 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8437 f_ovf, NULL_TREE);
8438 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8439 f_sav, NULL_TREE);
8440
8441 /* Count number of gp and fp argument registers used. */
8442 words = crtl->args.info.words;
8443 n_gpr = crtl->args.info.regno;
8444 n_fpr = crtl->args.info.sse_regno;
8445
8446 if (cfun->va_list_gpr_size)
8447 {
8448 type = TREE_TYPE (gpr);
8449 t = build2 (MODIFY_EXPR, type,
8450 gpr, build_int_cst (type, n_gpr * 8));
8451 TREE_SIDE_EFFECTS (t) = 1;
8452 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8453 }
8454
8455 if (TARGET_SSE && cfun->va_list_fpr_size)
8456 {
8457 type = TREE_TYPE (fpr);
8458 t = build2 (MODIFY_EXPR, type, fpr,
8459 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8460 TREE_SIDE_EFFECTS (t) = 1;
8461 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8462 }
8463
8464 /* Find the overflow area. */
8465 type = TREE_TYPE (ovf);
8466 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8467 ovf_rtx = crtl->args.internal_arg_pointer;
8468 else
8469 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8470 t = make_tree (type, ovf_rtx);
8471 if (words != 0)
8472 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8473 t = build2 (MODIFY_EXPR, type, ovf, t);
8474 TREE_SIDE_EFFECTS (t) = 1;
8475 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8476
8477 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8478 {
8479 /* Find the register save area.
8480 Prologue of the function save it right above stack frame. */
8481 type = TREE_TYPE (sav);
8482 t = make_tree (type, frame_pointer_rtx);
8483 if (!ix86_varargs_gpr_size)
8484 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8485 t = build2 (MODIFY_EXPR, type, sav, t);
8486 TREE_SIDE_EFFECTS (t) = 1;
8487 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8488 }
8489 }
8490
8491 /* Implement va_arg. */
8492
8493 static tree
8494 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8495 gimple_seq *post_p)
8496 {
8497 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8498 tree f_gpr, f_fpr, f_ovf, f_sav;
8499 tree gpr, fpr, ovf, sav, t;
8500 int size, rsize;
8501 tree lab_false, lab_over = NULL_TREE;
8502 tree addr, t2;
8503 rtx container;
8504 int indirect_p = 0;
8505 tree ptrtype;
8506 enum machine_mode nat_mode;
8507 unsigned int arg_boundary;
8508
8509 /* Only 64bit target needs something special. */
8510 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8511 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8512
8513 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8514 f_fpr = DECL_CHAIN (f_gpr);
8515 f_ovf = DECL_CHAIN (f_fpr);
8516 f_sav = DECL_CHAIN (f_ovf);
8517
8518 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8519 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8520 valist = build_va_arg_indirect_ref (valist);
8521 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8522 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8523 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8524
8525 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8526 if (indirect_p)
8527 type = build_pointer_type (type);
8528 size = int_size_in_bytes (type);
8529 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8530
8531 nat_mode = type_natural_mode (type, NULL, false);
8532 switch (nat_mode)
8533 {
8534 case V8SFmode:
8535 case V8SImode:
8536 case V32QImode:
8537 case V16HImode:
8538 case V4DFmode:
8539 case V4DImode:
8540 case V16SFmode:
8541 case V16SImode:
8542 case V64QImode:
8543 case V32HImode:
8544 case V8DFmode:
8545 case V8DImode:
8546 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8547 if (!TARGET_64BIT_MS_ABI)
8548 {
8549 container = NULL;
8550 break;
8551 }
8552
8553 default:
8554 container = construct_container (nat_mode, TYPE_MODE (type),
8555 type, 0, X86_64_REGPARM_MAX,
8556 X86_64_SSE_REGPARM_MAX, intreg,
8557 0);
8558 break;
8559 }
8560
8561 /* Pull the value out of the saved registers. */
8562
8563 addr = create_tmp_var (ptr_type_node, "addr");
8564
8565 if (container)
8566 {
8567 int needed_intregs, needed_sseregs;
8568 bool need_temp;
8569 tree int_addr, sse_addr;
8570
8571 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8572 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8573
8574 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8575
8576 need_temp = (!REG_P (container)
8577 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8578 || TYPE_ALIGN (type) > 128));
8579
8580 /* In case we are passing structure, verify that it is consecutive block
8581 on the register save area. If not we need to do moves. */
8582 if (!need_temp && !REG_P (container))
8583 {
8584 /* Verify that all registers are strictly consecutive */
8585 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8586 {
8587 int i;
8588
8589 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8590 {
8591 rtx slot = XVECEXP (container, 0, i);
8592 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8593 || INTVAL (XEXP (slot, 1)) != i * 16)
8594 need_temp = 1;
8595 }
8596 }
8597 else
8598 {
8599 int i;
8600
8601 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8602 {
8603 rtx slot = XVECEXP (container, 0, i);
8604 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8605 || INTVAL (XEXP (slot, 1)) != i * 8)
8606 need_temp = 1;
8607 }
8608 }
8609 }
8610 if (!need_temp)
8611 {
8612 int_addr = addr;
8613 sse_addr = addr;
8614 }
8615 else
8616 {
8617 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8618 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8619 }
8620
8621 /* First ensure that we fit completely in registers. */
8622 if (needed_intregs)
8623 {
8624 t = build_int_cst (TREE_TYPE (gpr),
8625 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8626 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8627 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8628 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8629 gimplify_and_add (t, pre_p);
8630 }
8631 if (needed_sseregs)
8632 {
8633 t = build_int_cst (TREE_TYPE (fpr),
8634 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8635 + X86_64_REGPARM_MAX * 8);
8636 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8637 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8638 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8639 gimplify_and_add (t, pre_p);
8640 }
8641
8642 /* Compute index to start of area used for integer regs. */
8643 if (needed_intregs)
8644 {
8645 /* int_addr = gpr + sav; */
8646 t = fold_build_pointer_plus (sav, gpr);
8647 gimplify_assign (int_addr, t, pre_p);
8648 }
8649 if (needed_sseregs)
8650 {
8651 /* sse_addr = fpr + sav; */
8652 t = fold_build_pointer_plus (sav, fpr);
8653 gimplify_assign (sse_addr, t, pre_p);
8654 }
8655 if (need_temp)
8656 {
8657 int i, prev_size = 0;
8658 tree temp = create_tmp_var (type, "va_arg_tmp");
8659
8660 /* addr = &temp; */
8661 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8662 gimplify_assign (addr, t, pre_p);
8663
8664 for (i = 0; i < XVECLEN (container, 0); i++)
8665 {
8666 rtx slot = XVECEXP (container, 0, i);
8667 rtx reg = XEXP (slot, 0);
8668 enum machine_mode mode = GET_MODE (reg);
8669 tree piece_type;
8670 tree addr_type;
8671 tree daddr_type;
8672 tree src_addr, src;
8673 int src_offset;
8674 tree dest_addr, dest;
8675 int cur_size = GET_MODE_SIZE (mode);
8676
8677 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8678 prev_size = INTVAL (XEXP (slot, 1));
8679 if (prev_size + cur_size > size)
8680 {
8681 cur_size = size - prev_size;
8682 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8683 if (mode == BLKmode)
8684 mode = QImode;
8685 }
8686 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8687 if (mode == GET_MODE (reg))
8688 addr_type = build_pointer_type (piece_type);
8689 else
8690 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8691 true);
8692 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8693 true);
8694
8695 if (SSE_REGNO_P (REGNO (reg)))
8696 {
8697 src_addr = sse_addr;
8698 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8699 }
8700 else
8701 {
8702 src_addr = int_addr;
8703 src_offset = REGNO (reg) * 8;
8704 }
8705 src_addr = fold_convert (addr_type, src_addr);
8706 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8707
8708 dest_addr = fold_convert (daddr_type, addr);
8709 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8710 if (cur_size == GET_MODE_SIZE (mode))
8711 {
8712 src = build_va_arg_indirect_ref (src_addr);
8713 dest = build_va_arg_indirect_ref (dest_addr);
8714
8715 gimplify_assign (dest, src, pre_p);
8716 }
8717 else
8718 {
8719 tree copy
8720 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8721 3, dest_addr, src_addr,
8722 size_int (cur_size));
8723 gimplify_and_add (copy, pre_p);
8724 }
8725 prev_size += cur_size;
8726 }
8727 }
8728
8729 if (needed_intregs)
8730 {
8731 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8732 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8733 gimplify_assign (gpr, t, pre_p);
8734 }
8735
8736 if (needed_sseregs)
8737 {
8738 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8739 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8740 gimplify_assign (fpr, t, pre_p);
8741 }
8742
8743 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8744
8745 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8746 }
8747
8748 /* ... otherwise out of the overflow area. */
8749
8750 /* When we align parameter on stack for caller, if the parameter
8751 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8752 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8753 here with caller. */
8754 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8755 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8756 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8757
8758 /* Care for on-stack alignment if needed. */
8759 if (arg_boundary <= 64 || size == 0)
8760 t = ovf;
8761 else
8762 {
8763 HOST_WIDE_INT align = arg_boundary / 8;
8764 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8765 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8766 build_int_cst (TREE_TYPE (t), -align));
8767 }
8768
8769 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8770 gimplify_assign (addr, t, pre_p);
8771
8772 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8773 gimplify_assign (unshare_expr (ovf), t, pre_p);
8774
8775 if (container)
8776 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8777
8778 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8779 addr = fold_convert (ptrtype, addr);
8780
8781 if (indirect_p)
8782 addr = build_va_arg_indirect_ref (addr);
8783 return build_va_arg_indirect_ref (addr);
8784 }
8785 \f
8786 /* Return true if OPNUM's MEM should be matched
8787 in movabs* patterns. */
8788
8789 bool
8790 ix86_check_movabs (rtx insn, int opnum)
8791 {
8792 rtx set, mem;
8793
8794 set = PATTERN (insn);
8795 if (GET_CODE (set) == PARALLEL)
8796 set = XVECEXP (set, 0, 0);
8797 gcc_assert (GET_CODE (set) == SET);
8798 mem = XEXP (set, opnum);
8799 while (GET_CODE (mem) == SUBREG)
8800 mem = SUBREG_REG (mem);
8801 gcc_assert (MEM_P (mem));
8802 return volatile_ok || !MEM_VOLATILE_P (mem);
8803 }
8804 \f
8805 /* Initialize the table of extra 80387 mathematical constants. */
8806
8807 static void
8808 init_ext_80387_constants (void)
8809 {
8810 static const char * cst[5] =
8811 {
8812 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8813 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8814 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8815 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8816 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8817 };
8818 int i;
8819
8820 for (i = 0; i < 5; i++)
8821 {
8822 real_from_string (&ext_80387_constants_table[i], cst[i]);
8823 /* Ensure each constant is rounded to XFmode precision. */
8824 real_convert (&ext_80387_constants_table[i],
8825 XFmode, &ext_80387_constants_table[i]);
8826 }
8827
8828 ext_80387_constants_init = 1;
8829 }
8830
8831 /* Return non-zero if the constant is something that
8832 can be loaded with a special instruction. */
8833
8834 int
8835 standard_80387_constant_p (rtx x)
8836 {
8837 enum machine_mode mode = GET_MODE (x);
8838
8839 REAL_VALUE_TYPE r;
8840
8841 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8842 return -1;
8843
8844 if (x == CONST0_RTX (mode))
8845 return 1;
8846 if (x == CONST1_RTX (mode))
8847 return 2;
8848
8849 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8850
8851 /* For XFmode constants, try to find a special 80387 instruction when
8852 optimizing for size or on those CPUs that benefit from them. */
8853 if (mode == XFmode
8854 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8855 {
8856 int i;
8857
8858 if (! ext_80387_constants_init)
8859 init_ext_80387_constants ();
8860
8861 for (i = 0; i < 5; i++)
8862 if (real_identical (&r, &ext_80387_constants_table[i]))
8863 return i + 3;
8864 }
8865
8866 /* Load of the constant -0.0 or -1.0 will be split as
8867 fldz;fchs or fld1;fchs sequence. */
8868 if (real_isnegzero (&r))
8869 return 8;
8870 if (real_identical (&r, &dconstm1))
8871 return 9;
8872
8873 return 0;
8874 }
8875
8876 /* Return the opcode of the special instruction to be used to load
8877 the constant X. */
8878
8879 const char *
8880 standard_80387_constant_opcode (rtx x)
8881 {
8882 switch (standard_80387_constant_p (x))
8883 {
8884 case 1:
8885 return "fldz";
8886 case 2:
8887 return "fld1";
8888 case 3:
8889 return "fldlg2";
8890 case 4:
8891 return "fldln2";
8892 case 5:
8893 return "fldl2e";
8894 case 6:
8895 return "fldl2t";
8896 case 7:
8897 return "fldpi";
8898 case 8:
8899 case 9:
8900 return "#";
8901 default:
8902 gcc_unreachable ();
8903 }
8904 }
8905
8906 /* Return the CONST_DOUBLE representing the 80387 constant that is
8907 loaded by the specified special instruction. The argument IDX
8908 matches the return value from standard_80387_constant_p. */
8909
8910 rtx
8911 standard_80387_constant_rtx (int idx)
8912 {
8913 int i;
8914
8915 if (! ext_80387_constants_init)
8916 init_ext_80387_constants ();
8917
8918 switch (idx)
8919 {
8920 case 3:
8921 case 4:
8922 case 5:
8923 case 6:
8924 case 7:
8925 i = idx - 3;
8926 break;
8927
8928 default:
8929 gcc_unreachable ();
8930 }
8931
8932 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8933 XFmode);
8934 }
8935
8936 /* Return 1 if X is all 0s and 2 if x is all 1s
8937 in supported SSE/AVX vector mode. */
8938
8939 int
8940 standard_sse_constant_p (rtx x)
8941 {
8942 enum machine_mode mode = GET_MODE (x);
8943
8944 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8945 return 1;
8946 if (vector_all_ones_operand (x, mode))
8947 switch (mode)
8948 {
8949 case V16QImode:
8950 case V8HImode:
8951 case V4SImode:
8952 case V2DImode:
8953 if (TARGET_SSE2)
8954 return 2;
8955 case V32QImode:
8956 case V16HImode:
8957 case V8SImode:
8958 case V4DImode:
8959 if (TARGET_AVX2)
8960 return 2;
8961 case V64QImode:
8962 case V32HImode:
8963 case V16SImode:
8964 case V8DImode:
8965 if (TARGET_AVX512F)
8966 return 2;
8967 default:
8968 break;
8969 }
8970
8971 return 0;
8972 }
8973
8974 /* Return the opcode of the special instruction to be used to load
8975 the constant X. */
8976
8977 const char *
8978 standard_sse_constant_opcode (rtx insn, rtx x)
8979 {
8980 switch (standard_sse_constant_p (x))
8981 {
8982 case 1:
8983 switch (get_attr_mode (insn))
8984 {
8985 case MODE_XI:
8986 case MODE_V16SF:
8987 return "vpxord\t%g0, %g0, %g0";
8988 case MODE_V8DF:
8989 return "vpxorq\t%g0, %g0, %g0";
8990 case MODE_TI:
8991 return "%vpxor\t%0, %d0";
8992 case MODE_V2DF:
8993 return "%vxorpd\t%0, %d0";
8994 case MODE_V4SF:
8995 return "%vxorps\t%0, %d0";
8996
8997 case MODE_OI:
8998 return "vpxor\t%x0, %x0, %x0";
8999 case MODE_V4DF:
9000 return "vxorpd\t%x0, %x0, %x0";
9001 case MODE_V8SF:
9002 return "vxorps\t%x0, %x0, %x0";
9003
9004 default:
9005 break;
9006 }
9007
9008 case 2:
9009 if (get_attr_mode (insn) == MODE_XI
9010 || get_attr_mode (insn) == MODE_V8DF
9011 || get_attr_mode (insn) == MODE_V16SF)
9012 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9013 if (TARGET_AVX)
9014 return "vpcmpeqd\t%0, %0, %0";
9015 else
9016 return "pcmpeqd\t%0, %0";
9017
9018 default:
9019 break;
9020 }
9021 gcc_unreachable ();
9022 }
9023
9024 /* Returns true if OP contains a symbol reference */
9025
9026 bool
9027 symbolic_reference_mentioned_p (rtx op)
9028 {
9029 const char *fmt;
9030 int i;
9031
9032 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9033 return true;
9034
9035 fmt = GET_RTX_FORMAT (GET_CODE (op));
9036 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9037 {
9038 if (fmt[i] == 'E')
9039 {
9040 int j;
9041
9042 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9043 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9044 return true;
9045 }
9046
9047 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9048 return true;
9049 }
9050
9051 return false;
9052 }
9053
9054 /* Return true if it is appropriate to emit `ret' instructions in the
9055 body of a function. Do this only if the epilogue is simple, needing a
9056 couple of insns. Prior to reloading, we can't tell how many registers
9057 must be saved, so return false then. Return false if there is no frame
9058 marker to de-allocate. */
9059
9060 bool
9061 ix86_can_use_return_insn_p (void)
9062 {
9063 struct ix86_frame frame;
9064
9065 if (! reload_completed || frame_pointer_needed)
9066 return 0;
9067
9068 /* Don't allow more than 32k pop, since that's all we can do
9069 with one instruction. */
9070 if (crtl->args.pops_args && crtl->args.size >= 32768)
9071 return 0;
9072
9073 ix86_compute_frame_layout (&frame);
9074 return (frame.stack_pointer_offset == UNITS_PER_WORD
9075 && (frame.nregs + frame.nsseregs) == 0);
9076 }
9077 \f
9078 /* Value should be nonzero if functions must have frame pointers.
9079 Zero means the frame pointer need not be set up (and parms may
9080 be accessed via the stack pointer) in functions that seem suitable. */
9081
9082 static bool
9083 ix86_frame_pointer_required (void)
9084 {
9085 /* If we accessed previous frames, then the generated code expects
9086 to be able to access the saved ebp value in our frame. */
9087 if (cfun->machine->accesses_prev_frame)
9088 return true;
9089
9090 /* Several x86 os'es need a frame pointer for other reasons,
9091 usually pertaining to setjmp. */
9092 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9093 return true;
9094
9095 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9096 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9097 return true;
9098
9099 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9100 allocation is 4GB. */
9101 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9102 return true;
9103
9104 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9105 turns off the frame pointer by default. Turn it back on now if
9106 we've not got a leaf function. */
9107 if (TARGET_OMIT_LEAF_FRAME_POINTER
9108 && (!crtl->is_leaf
9109 || ix86_current_function_calls_tls_descriptor))
9110 return true;
9111
9112 if (crtl->profile && !flag_fentry)
9113 return true;
9114
9115 return false;
9116 }
9117
9118 /* Record that the current function accesses previous call frames. */
9119
9120 void
9121 ix86_setup_frame_addresses (void)
9122 {
9123 cfun->machine->accesses_prev_frame = 1;
9124 }
9125 \f
9126 #ifndef USE_HIDDEN_LINKONCE
9127 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9128 # define USE_HIDDEN_LINKONCE 1
9129 # else
9130 # define USE_HIDDEN_LINKONCE 0
9131 # endif
9132 #endif
9133
9134 static int pic_labels_used;
9135
9136 /* Fills in the label name that should be used for a pc thunk for
9137 the given register. */
9138
9139 static void
9140 get_pc_thunk_name (char name[32], unsigned int regno)
9141 {
9142 gcc_assert (!TARGET_64BIT);
9143
9144 if (USE_HIDDEN_LINKONCE)
9145 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9146 else
9147 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9148 }
9149
9150
9151 /* This function generates code for -fpic that loads %ebx with
9152 the return address of the caller and then returns. */
9153
9154 static void
9155 ix86_code_end (void)
9156 {
9157 rtx xops[2];
9158 int regno;
9159
9160 for (regno = AX_REG; regno <= SP_REG; regno++)
9161 {
9162 char name[32];
9163 tree decl;
9164
9165 if (!(pic_labels_used & (1 << regno)))
9166 continue;
9167
9168 get_pc_thunk_name (name, regno);
9169
9170 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9171 get_identifier (name),
9172 build_function_type_list (void_type_node, NULL_TREE));
9173 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9174 NULL_TREE, void_type_node);
9175 TREE_PUBLIC (decl) = 1;
9176 TREE_STATIC (decl) = 1;
9177 DECL_IGNORED_P (decl) = 1;
9178
9179 #if TARGET_MACHO
9180 if (TARGET_MACHO)
9181 {
9182 switch_to_section (darwin_sections[text_coal_section]);
9183 fputs ("\t.weak_definition\t", asm_out_file);
9184 assemble_name (asm_out_file, name);
9185 fputs ("\n\t.private_extern\t", asm_out_file);
9186 assemble_name (asm_out_file, name);
9187 putc ('\n', asm_out_file);
9188 ASM_OUTPUT_LABEL (asm_out_file, name);
9189 DECL_WEAK (decl) = 1;
9190 }
9191 else
9192 #endif
9193 if (USE_HIDDEN_LINKONCE)
9194 {
9195 cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9196
9197 targetm.asm_out.unique_section (decl, 0);
9198 switch_to_section (get_named_section (decl, NULL, 0));
9199
9200 targetm.asm_out.globalize_label (asm_out_file, name);
9201 fputs ("\t.hidden\t", asm_out_file);
9202 assemble_name (asm_out_file, name);
9203 putc ('\n', asm_out_file);
9204 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9205 }
9206 else
9207 {
9208 switch_to_section (text_section);
9209 ASM_OUTPUT_LABEL (asm_out_file, name);
9210 }
9211
9212 DECL_INITIAL (decl) = make_node (BLOCK);
9213 current_function_decl = decl;
9214 init_function_start (decl);
9215 first_function_block_is_cold = false;
9216 /* Make sure unwind info is emitted for the thunk if needed. */
9217 final_start_function (emit_barrier (), asm_out_file, 1);
9218
9219 /* Pad stack IP move with 4 instructions (two NOPs count
9220 as one instruction). */
9221 if (TARGET_PAD_SHORT_FUNCTION)
9222 {
9223 int i = 8;
9224
9225 while (i--)
9226 fputs ("\tnop\n", asm_out_file);
9227 }
9228
9229 xops[0] = gen_rtx_REG (Pmode, regno);
9230 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9231 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9232 fputs ("\tret\n", asm_out_file);
9233 final_end_function ();
9234 init_insn_lengths ();
9235 free_after_compilation (cfun);
9236 set_cfun (NULL);
9237 current_function_decl = NULL;
9238 }
9239
9240 if (flag_split_stack)
9241 file_end_indicate_split_stack ();
9242 }
9243
9244 /* Emit code for the SET_GOT patterns. */
9245
9246 const char *
9247 output_set_got (rtx dest, rtx label)
9248 {
9249 rtx xops[3];
9250
9251 xops[0] = dest;
9252
9253 if (TARGET_VXWORKS_RTP && flag_pic)
9254 {
9255 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9256 xops[2] = gen_rtx_MEM (Pmode,
9257 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9258 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9259
9260 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9261 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9262 an unadorned address. */
9263 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9264 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9265 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9266 return "";
9267 }
9268
9269 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9270
9271 if (!flag_pic)
9272 {
9273 if (TARGET_MACHO)
9274 /* We don't need a pic base, we're not producing pic. */
9275 gcc_unreachable ();
9276
9277 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9278 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9279 targetm.asm_out.internal_label (asm_out_file, "L",
9280 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9281 }
9282 else
9283 {
9284 char name[32];
9285 get_pc_thunk_name (name, REGNO (dest));
9286 pic_labels_used |= 1 << REGNO (dest);
9287
9288 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9289 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9290 output_asm_insn ("call\t%X2", xops);
9291
9292 #if TARGET_MACHO
9293 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9294 This is what will be referenced by the Mach-O PIC subsystem. */
9295 if (machopic_should_output_picbase_label () || !label)
9296 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9297
9298 /* When we are restoring the pic base at the site of a nonlocal label,
9299 and we decided to emit the pic base above, we will still output a
9300 local label used for calculating the correction offset (even though
9301 the offset will be 0 in that case). */
9302 if (label)
9303 targetm.asm_out.internal_label (asm_out_file, "L",
9304 CODE_LABEL_NUMBER (label));
9305 #endif
9306 }
9307
9308 if (!TARGET_MACHO)
9309 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9310
9311 return "";
9312 }
9313
9314 /* Generate an "push" pattern for input ARG. */
9315
9316 static rtx
9317 gen_push (rtx arg)
9318 {
9319 struct machine_function *m = cfun->machine;
9320
9321 if (m->fs.cfa_reg == stack_pointer_rtx)
9322 m->fs.cfa_offset += UNITS_PER_WORD;
9323 m->fs.sp_offset += UNITS_PER_WORD;
9324
9325 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9326 arg = gen_rtx_REG (word_mode, REGNO (arg));
9327
9328 return gen_rtx_SET (VOIDmode,
9329 gen_rtx_MEM (word_mode,
9330 gen_rtx_PRE_DEC (Pmode,
9331 stack_pointer_rtx)),
9332 arg);
9333 }
9334
9335 /* Generate an "pop" pattern for input ARG. */
9336
9337 static rtx
9338 gen_pop (rtx arg)
9339 {
9340 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9341 arg = gen_rtx_REG (word_mode, REGNO (arg));
9342
9343 return gen_rtx_SET (VOIDmode,
9344 arg,
9345 gen_rtx_MEM (word_mode,
9346 gen_rtx_POST_INC (Pmode,
9347 stack_pointer_rtx)));
9348 }
9349
9350 /* Return >= 0 if there is an unused call-clobbered register available
9351 for the entire function. */
9352
9353 static unsigned int
9354 ix86_select_alt_pic_regnum (void)
9355 {
9356 if (crtl->is_leaf
9357 && !crtl->profile
9358 && !ix86_current_function_calls_tls_descriptor)
9359 {
9360 int i, drap;
9361 /* Can't use the same register for both PIC and DRAP. */
9362 if (crtl->drap_reg)
9363 drap = REGNO (crtl->drap_reg);
9364 else
9365 drap = -1;
9366 for (i = 2; i >= 0; --i)
9367 if (i != drap && !df_regs_ever_live_p (i))
9368 return i;
9369 }
9370
9371 return INVALID_REGNUM;
9372 }
9373
9374 /* Return TRUE if we need to save REGNO. */
9375
9376 static bool
9377 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9378 {
9379 if (pic_offset_table_rtx
9380 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9381 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9382 || crtl->profile
9383 || crtl->calls_eh_return
9384 || crtl->uses_const_pool
9385 || cfun->has_nonlocal_label))
9386 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9387
9388 if (crtl->calls_eh_return && maybe_eh_return)
9389 {
9390 unsigned i;
9391 for (i = 0; ; i++)
9392 {
9393 unsigned test = EH_RETURN_DATA_REGNO (i);
9394 if (test == INVALID_REGNUM)
9395 break;
9396 if (test == regno)
9397 return true;
9398 }
9399 }
9400
9401 if (crtl->drap_reg
9402 && regno == REGNO (crtl->drap_reg)
9403 && !cfun->machine->no_drap_save_restore)
9404 return true;
9405
9406 return (df_regs_ever_live_p (regno)
9407 && !call_used_regs[regno]
9408 && !fixed_regs[regno]
9409 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9410 }
9411
9412 /* Return number of saved general prupose registers. */
9413
9414 static int
9415 ix86_nsaved_regs (void)
9416 {
9417 int nregs = 0;
9418 int regno;
9419
9420 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9421 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9422 nregs ++;
9423 return nregs;
9424 }
9425
9426 /* Return number of saved SSE registrers. */
9427
9428 static int
9429 ix86_nsaved_sseregs (void)
9430 {
9431 int nregs = 0;
9432 int regno;
9433
9434 if (!TARGET_64BIT_MS_ABI)
9435 return 0;
9436 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9437 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9438 nregs ++;
9439 return nregs;
9440 }
9441
9442 /* Given FROM and TO register numbers, say whether this elimination is
9443 allowed. If stack alignment is needed, we can only replace argument
9444 pointer with hard frame pointer, or replace frame pointer with stack
9445 pointer. Otherwise, frame pointer elimination is automatically
9446 handled and all other eliminations are valid. */
9447
9448 static bool
9449 ix86_can_eliminate (const int from, const int to)
9450 {
9451 if (stack_realign_fp)
9452 return ((from == ARG_POINTER_REGNUM
9453 && to == HARD_FRAME_POINTER_REGNUM)
9454 || (from == FRAME_POINTER_REGNUM
9455 && to == STACK_POINTER_REGNUM));
9456 else
9457 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9458 }
9459
9460 /* Return the offset between two registers, one to be eliminated, and the other
9461 its replacement, at the start of a routine. */
9462
9463 HOST_WIDE_INT
9464 ix86_initial_elimination_offset (int from, int to)
9465 {
9466 struct ix86_frame frame;
9467 ix86_compute_frame_layout (&frame);
9468
9469 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9470 return frame.hard_frame_pointer_offset;
9471 else if (from == FRAME_POINTER_REGNUM
9472 && to == HARD_FRAME_POINTER_REGNUM)
9473 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9474 else
9475 {
9476 gcc_assert (to == STACK_POINTER_REGNUM);
9477
9478 if (from == ARG_POINTER_REGNUM)
9479 return frame.stack_pointer_offset;
9480
9481 gcc_assert (from == FRAME_POINTER_REGNUM);
9482 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9483 }
9484 }
9485
9486 /* In a dynamically-aligned function, we can't know the offset from
9487 stack pointer to frame pointer, so we must ensure that setjmp
9488 eliminates fp against the hard fp (%ebp) rather than trying to
9489 index from %esp up to the top of the frame across a gap that is
9490 of unknown (at compile-time) size. */
9491 static rtx
9492 ix86_builtin_setjmp_frame_value (void)
9493 {
9494 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9495 }
9496
9497 /* When using -fsplit-stack, the allocation routines set a field in
9498 the TCB to the bottom of the stack plus this much space, measured
9499 in bytes. */
9500
9501 #define SPLIT_STACK_AVAILABLE 256
9502
9503 /* Fill structure ix86_frame about frame of currently computed function. */
9504
9505 static void
9506 ix86_compute_frame_layout (struct ix86_frame *frame)
9507 {
9508 unsigned HOST_WIDE_INT stack_alignment_needed;
9509 HOST_WIDE_INT offset;
9510 unsigned HOST_WIDE_INT preferred_alignment;
9511 HOST_WIDE_INT size = get_frame_size ();
9512 HOST_WIDE_INT to_allocate;
9513
9514 frame->nregs = ix86_nsaved_regs ();
9515 frame->nsseregs = ix86_nsaved_sseregs ();
9516
9517 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9518 function prologues and leaf. */
9519 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9520 && (!crtl->is_leaf || cfun->calls_alloca != 0
9521 || ix86_current_function_calls_tls_descriptor))
9522 {
9523 crtl->preferred_stack_boundary = 128;
9524 crtl->stack_alignment_needed = 128;
9525 }
9526 /* preferred_stack_boundary is never updated for call
9527 expanded from tls descriptor. Update it here. We don't update it in
9528 expand stage because according to the comments before
9529 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9530 away. */
9531 else if (ix86_current_function_calls_tls_descriptor
9532 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9533 {
9534 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9535 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9536 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9537 }
9538
9539 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9540 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9541
9542 gcc_assert (!size || stack_alignment_needed);
9543 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9544 gcc_assert (preferred_alignment <= stack_alignment_needed);
9545
9546 /* For SEH we have to limit the amount of code movement into the prologue.
9547 At present we do this via a BLOCKAGE, at which point there's very little
9548 scheduling that can be done, which means that there's very little point
9549 in doing anything except PUSHs. */
9550 if (TARGET_SEH)
9551 cfun->machine->use_fast_prologue_epilogue = false;
9552
9553 /* During reload iteration the amount of registers saved can change.
9554 Recompute the value as needed. Do not recompute when amount of registers
9555 didn't change as reload does multiple calls to the function and does not
9556 expect the decision to change within single iteration. */
9557 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9558 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9559 {
9560 int count = frame->nregs;
9561 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9562
9563 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9564
9565 /* The fast prologue uses move instead of push to save registers. This
9566 is significantly longer, but also executes faster as modern hardware
9567 can execute the moves in parallel, but can't do that for push/pop.
9568
9569 Be careful about choosing what prologue to emit: When function takes
9570 many instructions to execute we may use slow version as well as in
9571 case function is known to be outside hot spot (this is known with
9572 feedback only). Weight the size of function by number of registers
9573 to save as it is cheap to use one or two push instructions but very
9574 slow to use many of them. */
9575 if (count)
9576 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9577 if (node->frequency < NODE_FREQUENCY_NORMAL
9578 || (flag_branch_probabilities
9579 && node->frequency < NODE_FREQUENCY_HOT))
9580 cfun->machine->use_fast_prologue_epilogue = false;
9581 else
9582 cfun->machine->use_fast_prologue_epilogue
9583 = !expensive_function_p (count);
9584 }
9585
9586 frame->save_regs_using_mov
9587 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9588 /* If static stack checking is enabled and done with probes,
9589 the registers need to be saved before allocating the frame. */
9590 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9591
9592 /* Skip return address. */
9593 offset = UNITS_PER_WORD;
9594
9595 /* Skip pushed static chain. */
9596 if (ix86_static_chain_on_stack)
9597 offset += UNITS_PER_WORD;
9598
9599 /* Skip saved base pointer. */
9600 if (frame_pointer_needed)
9601 offset += UNITS_PER_WORD;
9602 frame->hfp_save_offset = offset;
9603
9604 /* The traditional frame pointer location is at the top of the frame. */
9605 frame->hard_frame_pointer_offset = offset;
9606
9607 /* Register save area */
9608 offset += frame->nregs * UNITS_PER_WORD;
9609 frame->reg_save_offset = offset;
9610
9611 /* On SEH target, registers are pushed just before the frame pointer
9612 location. */
9613 if (TARGET_SEH)
9614 frame->hard_frame_pointer_offset = offset;
9615
9616 /* Align and set SSE register save area. */
9617 if (frame->nsseregs)
9618 {
9619 /* The only ABI that has saved SSE registers (Win64) also has a
9620 16-byte aligned default stack, and thus we don't need to be
9621 within the re-aligned local stack frame to save them. */
9622 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9623 offset = (offset + 16 - 1) & -16;
9624 offset += frame->nsseregs * 16;
9625 }
9626 frame->sse_reg_save_offset = offset;
9627
9628 /* The re-aligned stack starts here. Values before this point are not
9629 directly comparable with values below this point. In order to make
9630 sure that no value happens to be the same before and after, force
9631 the alignment computation below to add a non-zero value. */
9632 if (stack_realign_fp)
9633 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9634
9635 /* Va-arg area */
9636 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9637 offset += frame->va_arg_size;
9638
9639 /* Align start of frame for local function. */
9640 if (stack_realign_fp
9641 || offset != frame->sse_reg_save_offset
9642 || size != 0
9643 || !crtl->is_leaf
9644 || cfun->calls_alloca
9645 || ix86_current_function_calls_tls_descriptor)
9646 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9647
9648 /* Frame pointer points here. */
9649 frame->frame_pointer_offset = offset;
9650
9651 offset += size;
9652
9653 /* Add outgoing arguments area. Can be skipped if we eliminated
9654 all the function calls as dead code.
9655 Skipping is however impossible when function calls alloca. Alloca
9656 expander assumes that last crtl->outgoing_args_size
9657 of stack frame are unused. */
9658 if (ACCUMULATE_OUTGOING_ARGS
9659 && (!crtl->is_leaf || cfun->calls_alloca
9660 || ix86_current_function_calls_tls_descriptor))
9661 {
9662 offset += crtl->outgoing_args_size;
9663 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9664 }
9665 else
9666 frame->outgoing_arguments_size = 0;
9667
9668 /* Align stack boundary. Only needed if we're calling another function
9669 or using alloca. */
9670 if (!crtl->is_leaf || cfun->calls_alloca
9671 || ix86_current_function_calls_tls_descriptor)
9672 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9673
9674 /* We've reached end of stack frame. */
9675 frame->stack_pointer_offset = offset;
9676
9677 /* Size prologue needs to allocate. */
9678 to_allocate = offset - frame->sse_reg_save_offset;
9679
9680 if ((!to_allocate && frame->nregs <= 1)
9681 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9682 frame->save_regs_using_mov = false;
9683
9684 if (ix86_using_red_zone ()
9685 && crtl->sp_is_unchanging
9686 && crtl->is_leaf
9687 && !ix86_current_function_calls_tls_descriptor)
9688 {
9689 frame->red_zone_size = to_allocate;
9690 if (frame->save_regs_using_mov)
9691 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9692 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9693 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9694 }
9695 else
9696 frame->red_zone_size = 0;
9697 frame->stack_pointer_offset -= frame->red_zone_size;
9698
9699 /* The SEH frame pointer location is near the bottom of the frame.
9700 This is enforced by the fact that the difference between the
9701 stack pointer and the frame pointer is limited to 240 bytes in
9702 the unwind data structure. */
9703 if (TARGET_SEH)
9704 {
9705 HOST_WIDE_INT diff;
9706
9707 /* If we can leave the frame pointer where it is, do so. Also, returns
9708 the establisher frame for __builtin_frame_address (0). */
9709 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9710 if (diff <= SEH_MAX_FRAME_SIZE
9711 && (diff > 240 || (diff & 15) != 0)
9712 && !crtl->accesses_prior_frames)
9713 {
9714 /* Ideally we'd determine what portion of the local stack frame
9715 (within the constraint of the lowest 240) is most heavily used.
9716 But without that complication, simply bias the frame pointer
9717 by 128 bytes so as to maximize the amount of the local stack
9718 frame that is addressable with 8-bit offsets. */
9719 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9720 }
9721 }
9722 }
9723
9724 /* This is semi-inlined memory_address_length, but simplified
9725 since we know that we're always dealing with reg+offset, and
9726 to avoid having to create and discard all that rtl. */
9727
9728 static inline int
9729 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9730 {
9731 int len = 4;
9732
9733 if (offset == 0)
9734 {
9735 /* EBP and R13 cannot be encoded without an offset. */
9736 len = (regno == BP_REG || regno == R13_REG);
9737 }
9738 else if (IN_RANGE (offset, -128, 127))
9739 len = 1;
9740
9741 /* ESP and R12 must be encoded with a SIB byte. */
9742 if (regno == SP_REG || regno == R12_REG)
9743 len++;
9744
9745 return len;
9746 }
9747
9748 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9749 The valid base registers are taken from CFUN->MACHINE->FS. */
9750
9751 static rtx
9752 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9753 {
9754 const struct machine_function *m = cfun->machine;
9755 rtx base_reg = NULL;
9756 HOST_WIDE_INT base_offset = 0;
9757
9758 if (m->use_fast_prologue_epilogue)
9759 {
9760 /* Choose the base register most likely to allow the most scheduling
9761 opportunities. Generally FP is valid throughout the function,
9762 while DRAP must be reloaded within the epilogue. But choose either
9763 over the SP due to increased encoding size. */
9764
9765 if (m->fs.fp_valid)
9766 {
9767 base_reg = hard_frame_pointer_rtx;
9768 base_offset = m->fs.fp_offset - cfa_offset;
9769 }
9770 else if (m->fs.drap_valid)
9771 {
9772 base_reg = crtl->drap_reg;
9773 base_offset = 0 - cfa_offset;
9774 }
9775 else if (m->fs.sp_valid)
9776 {
9777 base_reg = stack_pointer_rtx;
9778 base_offset = m->fs.sp_offset - cfa_offset;
9779 }
9780 }
9781 else
9782 {
9783 HOST_WIDE_INT toffset;
9784 int len = 16, tlen;
9785
9786 /* Choose the base register with the smallest address encoding.
9787 With a tie, choose FP > DRAP > SP. */
9788 if (m->fs.sp_valid)
9789 {
9790 base_reg = stack_pointer_rtx;
9791 base_offset = m->fs.sp_offset - cfa_offset;
9792 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9793 }
9794 if (m->fs.drap_valid)
9795 {
9796 toffset = 0 - cfa_offset;
9797 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9798 if (tlen <= len)
9799 {
9800 base_reg = crtl->drap_reg;
9801 base_offset = toffset;
9802 len = tlen;
9803 }
9804 }
9805 if (m->fs.fp_valid)
9806 {
9807 toffset = m->fs.fp_offset - cfa_offset;
9808 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9809 if (tlen <= len)
9810 {
9811 base_reg = hard_frame_pointer_rtx;
9812 base_offset = toffset;
9813 len = tlen;
9814 }
9815 }
9816 }
9817 gcc_assert (base_reg != NULL);
9818
9819 return plus_constant (Pmode, base_reg, base_offset);
9820 }
9821
9822 /* Emit code to save registers in the prologue. */
9823
9824 static void
9825 ix86_emit_save_regs (void)
9826 {
9827 unsigned int regno;
9828 rtx insn;
9829
9830 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9831 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9832 {
9833 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9834 RTX_FRAME_RELATED_P (insn) = 1;
9835 }
9836 }
9837
9838 /* Emit a single register save at CFA - CFA_OFFSET. */
9839
9840 static void
9841 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9842 HOST_WIDE_INT cfa_offset)
9843 {
9844 struct machine_function *m = cfun->machine;
9845 rtx reg = gen_rtx_REG (mode, regno);
9846 rtx mem, addr, base, insn;
9847
9848 addr = choose_baseaddr (cfa_offset);
9849 mem = gen_frame_mem (mode, addr);
9850
9851 /* For SSE saves, we need to indicate the 128-bit alignment. */
9852 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9853
9854 insn = emit_move_insn (mem, reg);
9855 RTX_FRAME_RELATED_P (insn) = 1;
9856
9857 base = addr;
9858 if (GET_CODE (base) == PLUS)
9859 base = XEXP (base, 0);
9860 gcc_checking_assert (REG_P (base));
9861
9862 /* When saving registers into a re-aligned local stack frame, avoid
9863 any tricky guessing by dwarf2out. */
9864 if (m->fs.realigned)
9865 {
9866 gcc_checking_assert (stack_realign_drap);
9867
9868 if (regno == REGNO (crtl->drap_reg))
9869 {
9870 /* A bit of a hack. We force the DRAP register to be saved in
9871 the re-aligned stack frame, which provides us with a copy
9872 of the CFA that will last past the prologue. Install it. */
9873 gcc_checking_assert (cfun->machine->fs.fp_valid);
9874 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9875 cfun->machine->fs.fp_offset - cfa_offset);
9876 mem = gen_rtx_MEM (mode, addr);
9877 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9878 }
9879 else
9880 {
9881 /* The frame pointer is a stable reference within the
9882 aligned frame. Use it. */
9883 gcc_checking_assert (cfun->machine->fs.fp_valid);
9884 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9885 cfun->machine->fs.fp_offset - cfa_offset);
9886 mem = gen_rtx_MEM (mode, addr);
9887 add_reg_note (insn, REG_CFA_EXPRESSION,
9888 gen_rtx_SET (VOIDmode, mem, reg));
9889 }
9890 }
9891
9892 /* The memory may not be relative to the current CFA register,
9893 which means that we may need to generate a new pattern for
9894 use by the unwind info. */
9895 else if (base != m->fs.cfa_reg)
9896 {
9897 addr = plus_constant (Pmode, m->fs.cfa_reg,
9898 m->fs.cfa_offset - cfa_offset);
9899 mem = gen_rtx_MEM (mode, addr);
9900 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9901 }
9902 }
9903
9904 /* Emit code to save registers using MOV insns.
9905 First register is stored at CFA - CFA_OFFSET. */
9906 static void
9907 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9908 {
9909 unsigned int regno;
9910
9911 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9912 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9913 {
9914 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9915 cfa_offset -= UNITS_PER_WORD;
9916 }
9917 }
9918
9919 /* Emit code to save SSE registers using MOV insns.
9920 First register is stored at CFA - CFA_OFFSET. */
9921 static void
9922 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9923 {
9924 unsigned int regno;
9925
9926 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9927 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9928 {
9929 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9930 cfa_offset -= 16;
9931 }
9932 }
9933
9934 static GTY(()) rtx queued_cfa_restores;
9935
9936 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9937 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9938 Don't add the note if the previously saved value will be left untouched
9939 within stack red-zone till return, as unwinders can find the same value
9940 in the register and on the stack. */
9941
9942 static void
9943 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9944 {
9945 if (!crtl->shrink_wrapped
9946 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9947 return;
9948
9949 if (insn)
9950 {
9951 add_reg_note (insn, REG_CFA_RESTORE, reg);
9952 RTX_FRAME_RELATED_P (insn) = 1;
9953 }
9954 else
9955 queued_cfa_restores
9956 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9957 }
9958
9959 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9960
9961 static void
9962 ix86_add_queued_cfa_restore_notes (rtx insn)
9963 {
9964 rtx last;
9965 if (!queued_cfa_restores)
9966 return;
9967 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9968 ;
9969 XEXP (last, 1) = REG_NOTES (insn);
9970 REG_NOTES (insn) = queued_cfa_restores;
9971 queued_cfa_restores = NULL_RTX;
9972 RTX_FRAME_RELATED_P (insn) = 1;
9973 }
9974
9975 /* Expand prologue or epilogue stack adjustment.
9976 The pattern exist to put a dependency on all ebp-based memory accesses.
9977 STYLE should be negative if instructions should be marked as frame related,
9978 zero if %r11 register is live and cannot be freely used and positive
9979 otherwise. */
9980
9981 static void
9982 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9983 int style, bool set_cfa)
9984 {
9985 struct machine_function *m = cfun->machine;
9986 rtx insn;
9987 bool add_frame_related_expr = false;
9988
9989 if (Pmode == SImode)
9990 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9991 else if (x86_64_immediate_operand (offset, DImode))
9992 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9993 else
9994 {
9995 rtx tmp;
9996 /* r11 is used by indirect sibcall return as well, set before the
9997 epilogue and used after the epilogue. */
9998 if (style)
9999 tmp = gen_rtx_REG (DImode, R11_REG);
10000 else
10001 {
10002 gcc_assert (src != hard_frame_pointer_rtx
10003 && dest != hard_frame_pointer_rtx);
10004 tmp = hard_frame_pointer_rtx;
10005 }
10006 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10007 if (style < 0)
10008 add_frame_related_expr = true;
10009
10010 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10011 }
10012
10013 insn = emit_insn (insn);
10014 if (style >= 0)
10015 ix86_add_queued_cfa_restore_notes (insn);
10016
10017 if (set_cfa)
10018 {
10019 rtx r;
10020
10021 gcc_assert (m->fs.cfa_reg == src);
10022 m->fs.cfa_offset += INTVAL (offset);
10023 m->fs.cfa_reg = dest;
10024
10025 r = gen_rtx_PLUS (Pmode, src, offset);
10026 r = gen_rtx_SET (VOIDmode, dest, r);
10027 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10028 RTX_FRAME_RELATED_P (insn) = 1;
10029 }
10030 else if (style < 0)
10031 {
10032 RTX_FRAME_RELATED_P (insn) = 1;
10033 if (add_frame_related_expr)
10034 {
10035 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10036 r = gen_rtx_SET (VOIDmode, dest, r);
10037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10038 }
10039 }
10040
10041 if (dest == stack_pointer_rtx)
10042 {
10043 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10044 bool valid = m->fs.sp_valid;
10045
10046 if (src == hard_frame_pointer_rtx)
10047 {
10048 valid = m->fs.fp_valid;
10049 ooffset = m->fs.fp_offset;
10050 }
10051 else if (src == crtl->drap_reg)
10052 {
10053 valid = m->fs.drap_valid;
10054 ooffset = 0;
10055 }
10056 else
10057 {
10058 /* Else there are two possibilities: SP itself, which we set
10059 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10060 taken care of this by hand along the eh_return path. */
10061 gcc_checking_assert (src == stack_pointer_rtx
10062 || offset == const0_rtx);
10063 }
10064
10065 m->fs.sp_offset = ooffset - INTVAL (offset);
10066 m->fs.sp_valid = valid;
10067 }
10068 }
10069
10070 /* Find an available register to be used as dynamic realign argument
10071 pointer regsiter. Such a register will be written in prologue and
10072 used in begin of body, so it must not be
10073 1. parameter passing register.
10074 2. GOT pointer.
10075 We reuse static-chain register if it is available. Otherwise, we
10076 use DI for i386 and R13 for x86-64. We chose R13 since it has
10077 shorter encoding.
10078
10079 Return: the regno of chosen register. */
10080
10081 static unsigned int
10082 find_drap_reg (void)
10083 {
10084 tree decl = cfun->decl;
10085
10086 if (TARGET_64BIT)
10087 {
10088 /* Use R13 for nested function or function need static chain.
10089 Since function with tail call may use any caller-saved
10090 registers in epilogue, DRAP must not use caller-saved
10091 register in such case. */
10092 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10093 return R13_REG;
10094
10095 return R10_REG;
10096 }
10097 else
10098 {
10099 /* Use DI for nested function or function need static chain.
10100 Since function with tail call may use any caller-saved
10101 registers in epilogue, DRAP must not use caller-saved
10102 register in such case. */
10103 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10104 return DI_REG;
10105
10106 /* Reuse static chain register if it isn't used for parameter
10107 passing. */
10108 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10109 {
10110 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10111 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10112 return CX_REG;
10113 }
10114 return DI_REG;
10115 }
10116 }
10117
10118 /* Return minimum incoming stack alignment. */
10119
10120 static unsigned int
10121 ix86_minimum_incoming_stack_boundary (bool sibcall)
10122 {
10123 unsigned int incoming_stack_boundary;
10124
10125 /* Prefer the one specified at command line. */
10126 if (ix86_user_incoming_stack_boundary)
10127 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10128 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10129 if -mstackrealign is used, it isn't used for sibcall check and
10130 estimated stack alignment is 128bit. */
10131 else if (!sibcall
10132 && !TARGET_64BIT
10133 && ix86_force_align_arg_pointer
10134 && crtl->stack_alignment_estimated == 128)
10135 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10136 else
10137 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10138
10139 /* Incoming stack alignment can be changed on individual functions
10140 via force_align_arg_pointer attribute. We use the smallest
10141 incoming stack boundary. */
10142 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10143 && lookup_attribute (ix86_force_align_arg_pointer_string,
10144 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10145 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10146
10147 /* The incoming stack frame has to be aligned at least at
10148 parm_stack_boundary. */
10149 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10150 incoming_stack_boundary = crtl->parm_stack_boundary;
10151
10152 /* Stack at entrance of main is aligned by runtime. We use the
10153 smallest incoming stack boundary. */
10154 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10155 && DECL_NAME (current_function_decl)
10156 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10157 && DECL_FILE_SCOPE_P (current_function_decl))
10158 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10159
10160 return incoming_stack_boundary;
10161 }
10162
10163 /* Update incoming stack boundary and estimated stack alignment. */
10164
10165 static void
10166 ix86_update_stack_boundary (void)
10167 {
10168 ix86_incoming_stack_boundary
10169 = ix86_minimum_incoming_stack_boundary (false);
10170
10171 /* x86_64 vararg needs 16byte stack alignment for register save
10172 area. */
10173 if (TARGET_64BIT
10174 && cfun->stdarg
10175 && crtl->stack_alignment_estimated < 128)
10176 crtl->stack_alignment_estimated = 128;
10177 }
10178
10179 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10180 needed or an rtx for DRAP otherwise. */
10181
10182 static rtx
10183 ix86_get_drap_rtx (void)
10184 {
10185 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10186 crtl->need_drap = true;
10187
10188 if (stack_realign_drap)
10189 {
10190 /* Assign DRAP to vDRAP and returns vDRAP */
10191 unsigned int regno = find_drap_reg ();
10192 rtx drap_vreg;
10193 rtx arg_ptr;
10194 rtx seq, insn;
10195
10196 arg_ptr = gen_rtx_REG (Pmode, regno);
10197 crtl->drap_reg = arg_ptr;
10198
10199 start_sequence ();
10200 drap_vreg = copy_to_reg (arg_ptr);
10201 seq = get_insns ();
10202 end_sequence ();
10203
10204 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10205 if (!optimize)
10206 {
10207 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10208 RTX_FRAME_RELATED_P (insn) = 1;
10209 }
10210 return drap_vreg;
10211 }
10212 else
10213 return NULL;
10214 }
10215
10216 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10217
10218 static rtx
10219 ix86_internal_arg_pointer (void)
10220 {
10221 return virtual_incoming_args_rtx;
10222 }
10223
10224 struct scratch_reg {
10225 rtx reg;
10226 bool saved;
10227 };
10228
10229 /* Return a short-lived scratch register for use on function entry.
10230 In 32-bit mode, it is valid only after the registers are saved
10231 in the prologue. This register must be released by means of
10232 release_scratch_register_on_entry once it is dead. */
10233
10234 static void
10235 get_scratch_register_on_entry (struct scratch_reg *sr)
10236 {
10237 int regno;
10238
10239 sr->saved = false;
10240
10241 if (TARGET_64BIT)
10242 {
10243 /* We always use R11 in 64-bit mode. */
10244 regno = R11_REG;
10245 }
10246 else
10247 {
10248 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10249 bool fastcall_p
10250 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10251 bool thiscall_p
10252 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10253 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10254 int regparm = ix86_function_regparm (fntype, decl);
10255 int drap_regno
10256 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10257
10258 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10259 for the static chain register. */
10260 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10261 && drap_regno != AX_REG)
10262 regno = AX_REG;
10263 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10264 for the static chain register. */
10265 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10266 regno = AX_REG;
10267 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10268 regno = DX_REG;
10269 /* ecx is the static chain register. */
10270 else if (regparm < 3 && !fastcall_p && !thiscall_p
10271 && !static_chain_p
10272 && drap_regno != CX_REG)
10273 regno = CX_REG;
10274 else if (ix86_save_reg (BX_REG, true))
10275 regno = BX_REG;
10276 /* esi is the static chain register. */
10277 else if (!(regparm == 3 && static_chain_p)
10278 && ix86_save_reg (SI_REG, true))
10279 regno = SI_REG;
10280 else if (ix86_save_reg (DI_REG, true))
10281 regno = DI_REG;
10282 else
10283 {
10284 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10285 sr->saved = true;
10286 }
10287 }
10288
10289 sr->reg = gen_rtx_REG (Pmode, regno);
10290 if (sr->saved)
10291 {
10292 rtx insn = emit_insn (gen_push (sr->reg));
10293 RTX_FRAME_RELATED_P (insn) = 1;
10294 }
10295 }
10296
10297 /* Release a scratch register obtained from the preceding function. */
10298
10299 static void
10300 release_scratch_register_on_entry (struct scratch_reg *sr)
10301 {
10302 if (sr->saved)
10303 {
10304 struct machine_function *m = cfun->machine;
10305 rtx x, insn = emit_insn (gen_pop (sr->reg));
10306
10307 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10308 RTX_FRAME_RELATED_P (insn) = 1;
10309 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10310 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10311 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10312 m->fs.sp_offset -= UNITS_PER_WORD;
10313 }
10314 }
10315
10316 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10317
10318 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10319
10320 static void
10321 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10322 {
10323 /* We skip the probe for the first interval + a small dope of 4 words and
10324 probe that many bytes past the specified size to maintain a protection
10325 area at the botton of the stack. */
10326 const int dope = 4 * UNITS_PER_WORD;
10327 rtx size_rtx = GEN_INT (size), last;
10328
10329 /* See if we have a constant small number of probes to generate. If so,
10330 that's the easy case. The run-time loop is made up of 11 insns in the
10331 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10332 for n # of intervals. */
10333 if (size <= 5 * PROBE_INTERVAL)
10334 {
10335 HOST_WIDE_INT i, adjust;
10336 bool first_probe = true;
10337
10338 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10339 values of N from 1 until it exceeds SIZE. If only one probe is
10340 needed, this will not generate any code. Then adjust and probe
10341 to PROBE_INTERVAL + SIZE. */
10342 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10343 {
10344 if (first_probe)
10345 {
10346 adjust = 2 * PROBE_INTERVAL + dope;
10347 first_probe = false;
10348 }
10349 else
10350 adjust = PROBE_INTERVAL;
10351
10352 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10353 plus_constant (Pmode, stack_pointer_rtx,
10354 -adjust)));
10355 emit_stack_probe (stack_pointer_rtx);
10356 }
10357
10358 if (first_probe)
10359 adjust = size + PROBE_INTERVAL + dope;
10360 else
10361 adjust = size + PROBE_INTERVAL - i;
10362
10363 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10364 plus_constant (Pmode, stack_pointer_rtx,
10365 -adjust)));
10366 emit_stack_probe (stack_pointer_rtx);
10367
10368 /* Adjust back to account for the additional first interval. */
10369 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10370 plus_constant (Pmode, stack_pointer_rtx,
10371 PROBE_INTERVAL + dope)));
10372 }
10373
10374 /* Otherwise, do the same as above, but in a loop. Note that we must be
10375 extra careful with variables wrapping around because we might be at
10376 the very top (or the very bottom) of the address space and we have
10377 to be able to handle this case properly; in particular, we use an
10378 equality test for the loop condition. */
10379 else
10380 {
10381 HOST_WIDE_INT rounded_size;
10382 struct scratch_reg sr;
10383
10384 get_scratch_register_on_entry (&sr);
10385
10386
10387 /* Step 1: round SIZE to the previous multiple of the interval. */
10388
10389 rounded_size = size & -PROBE_INTERVAL;
10390
10391
10392 /* Step 2: compute initial and final value of the loop counter. */
10393
10394 /* SP = SP_0 + PROBE_INTERVAL. */
10395 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10396 plus_constant (Pmode, stack_pointer_rtx,
10397 - (PROBE_INTERVAL + dope))));
10398
10399 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10400 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10401 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10402 gen_rtx_PLUS (Pmode, sr.reg,
10403 stack_pointer_rtx)));
10404
10405
10406 /* Step 3: the loop
10407
10408 while (SP != LAST_ADDR)
10409 {
10410 SP = SP + PROBE_INTERVAL
10411 probe at SP
10412 }
10413
10414 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10415 values of N from 1 until it is equal to ROUNDED_SIZE. */
10416
10417 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10418
10419
10420 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10421 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10422
10423 if (size != rounded_size)
10424 {
10425 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10426 plus_constant (Pmode, stack_pointer_rtx,
10427 rounded_size - size)));
10428 emit_stack_probe (stack_pointer_rtx);
10429 }
10430
10431 /* Adjust back to account for the additional first interval. */
10432 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10433 plus_constant (Pmode, stack_pointer_rtx,
10434 PROBE_INTERVAL + dope)));
10435
10436 release_scratch_register_on_entry (&sr);
10437 }
10438
10439 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10440
10441 /* Even if the stack pointer isn't the CFA register, we need to correctly
10442 describe the adjustments made to it, in particular differentiate the
10443 frame-related ones from the frame-unrelated ones. */
10444 if (size > 0)
10445 {
10446 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10447 XVECEXP (expr, 0, 0)
10448 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10449 plus_constant (Pmode, stack_pointer_rtx, -size));
10450 XVECEXP (expr, 0, 1)
10451 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10452 plus_constant (Pmode, stack_pointer_rtx,
10453 PROBE_INTERVAL + dope + size));
10454 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10455 RTX_FRAME_RELATED_P (last) = 1;
10456
10457 cfun->machine->fs.sp_offset += size;
10458 }
10459
10460 /* Make sure nothing is scheduled before we are done. */
10461 emit_insn (gen_blockage ());
10462 }
10463
10464 /* Adjust the stack pointer up to REG while probing it. */
10465
10466 const char *
10467 output_adjust_stack_and_probe (rtx reg)
10468 {
10469 static int labelno = 0;
10470 char loop_lab[32], end_lab[32];
10471 rtx xops[2];
10472
10473 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10474 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10475
10476 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10477
10478 /* Jump to END_LAB if SP == LAST_ADDR. */
10479 xops[0] = stack_pointer_rtx;
10480 xops[1] = reg;
10481 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10482 fputs ("\tje\t", asm_out_file);
10483 assemble_name_raw (asm_out_file, end_lab);
10484 fputc ('\n', asm_out_file);
10485
10486 /* SP = SP + PROBE_INTERVAL. */
10487 xops[1] = GEN_INT (PROBE_INTERVAL);
10488 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10489
10490 /* Probe at SP. */
10491 xops[1] = const0_rtx;
10492 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10493
10494 fprintf (asm_out_file, "\tjmp\t");
10495 assemble_name_raw (asm_out_file, loop_lab);
10496 fputc ('\n', asm_out_file);
10497
10498 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10499
10500 return "";
10501 }
10502
10503 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10504 inclusive. These are offsets from the current stack pointer. */
10505
10506 static void
10507 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10508 {
10509 /* See if we have a constant small number of probes to generate. If so,
10510 that's the easy case. The run-time loop is made up of 7 insns in the
10511 generic case while the compile-time loop is made up of n insns for n #
10512 of intervals. */
10513 if (size <= 7 * PROBE_INTERVAL)
10514 {
10515 HOST_WIDE_INT i;
10516
10517 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10518 it exceeds SIZE. If only one probe is needed, this will not
10519 generate any code. Then probe at FIRST + SIZE. */
10520 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10521 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10522 -(first + i)));
10523
10524 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10525 -(first + size)));
10526 }
10527
10528 /* Otherwise, do the same as above, but in a loop. Note that we must be
10529 extra careful with variables wrapping around because we might be at
10530 the very top (or the very bottom) of the address space and we have
10531 to be able to handle this case properly; in particular, we use an
10532 equality test for the loop condition. */
10533 else
10534 {
10535 HOST_WIDE_INT rounded_size, last;
10536 struct scratch_reg sr;
10537
10538 get_scratch_register_on_entry (&sr);
10539
10540
10541 /* Step 1: round SIZE to the previous multiple of the interval. */
10542
10543 rounded_size = size & -PROBE_INTERVAL;
10544
10545
10546 /* Step 2: compute initial and final value of the loop counter. */
10547
10548 /* TEST_OFFSET = FIRST. */
10549 emit_move_insn (sr.reg, GEN_INT (-first));
10550
10551 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10552 last = first + rounded_size;
10553
10554
10555 /* Step 3: the loop
10556
10557 while (TEST_ADDR != LAST_ADDR)
10558 {
10559 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10560 probe at TEST_ADDR
10561 }
10562
10563 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10564 until it is equal to ROUNDED_SIZE. */
10565
10566 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10567
10568
10569 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10570 that SIZE is equal to ROUNDED_SIZE. */
10571
10572 if (size != rounded_size)
10573 emit_stack_probe (plus_constant (Pmode,
10574 gen_rtx_PLUS (Pmode,
10575 stack_pointer_rtx,
10576 sr.reg),
10577 rounded_size - size));
10578
10579 release_scratch_register_on_entry (&sr);
10580 }
10581
10582 /* Make sure nothing is scheduled before we are done. */
10583 emit_insn (gen_blockage ());
10584 }
10585
10586 /* Probe a range of stack addresses from REG to END, inclusive. These are
10587 offsets from the current stack pointer. */
10588
10589 const char *
10590 output_probe_stack_range (rtx reg, rtx end)
10591 {
10592 static int labelno = 0;
10593 char loop_lab[32], end_lab[32];
10594 rtx xops[3];
10595
10596 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10597 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10598
10599 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10600
10601 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10602 xops[0] = reg;
10603 xops[1] = end;
10604 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10605 fputs ("\tje\t", asm_out_file);
10606 assemble_name_raw (asm_out_file, end_lab);
10607 fputc ('\n', asm_out_file);
10608
10609 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10610 xops[1] = GEN_INT (PROBE_INTERVAL);
10611 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10612
10613 /* Probe at TEST_ADDR. */
10614 xops[0] = stack_pointer_rtx;
10615 xops[1] = reg;
10616 xops[2] = const0_rtx;
10617 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10618
10619 fprintf (asm_out_file, "\tjmp\t");
10620 assemble_name_raw (asm_out_file, loop_lab);
10621 fputc ('\n', asm_out_file);
10622
10623 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10624
10625 return "";
10626 }
10627
10628 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10629 to be generated in correct form. */
10630 static void
10631 ix86_finalize_stack_realign_flags (void)
10632 {
10633 /* Check if stack realign is really needed after reload, and
10634 stores result in cfun */
10635 unsigned int incoming_stack_boundary
10636 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10637 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10638 unsigned int stack_realign = (incoming_stack_boundary
10639 < (crtl->is_leaf
10640 ? crtl->max_used_stack_slot_alignment
10641 : crtl->stack_alignment_needed));
10642
10643 if (crtl->stack_realign_finalized)
10644 {
10645 /* After stack_realign_needed is finalized, we can't no longer
10646 change it. */
10647 gcc_assert (crtl->stack_realign_needed == stack_realign);
10648 return;
10649 }
10650
10651 /* If the only reason for frame_pointer_needed is that we conservatively
10652 assumed stack realignment might be needed, but in the end nothing that
10653 needed the stack alignment had been spilled, clear frame_pointer_needed
10654 and say we don't need stack realignment. */
10655 if (stack_realign
10656 && frame_pointer_needed
10657 && crtl->is_leaf
10658 && flag_omit_frame_pointer
10659 && crtl->sp_is_unchanging
10660 && !ix86_current_function_calls_tls_descriptor
10661 && !crtl->accesses_prior_frames
10662 && !cfun->calls_alloca
10663 && !crtl->calls_eh_return
10664 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10665 && !ix86_frame_pointer_required ()
10666 && get_frame_size () == 0
10667 && ix86_nsaved_sseregs () == 0
10668 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10669 {
10670 HARD_REG_SET set_up_by_prologue, prologue_used;
10671 basic_block bb;
10672
10673 CLEAR_HARD_REG_SET (prologue_used);
10674 CLEAR_HARD_REG_SET (set_up_by_prologue);
10675 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10676 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10677 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10678 HARD_FRAME_POINTER_REGNUM);
10679 FOR_EACH_BB_FN (bb, cfun)
10680 {
10681 rtx insn;
10682 FOR_BB_INSNS (bb, insn)
10683 if (NONDEBUG_INSN_P (insn)
10684 && requires_stack_frame_p (insn, prologue_used,
10685 set_up_by_prologue))
10686 {
10687 crtl->stack_realign_needed = stack_realign;
10688 crtl->stack_realign_finalized = true;
10689 return;
10690 }
10691 }
10692
10693 /* If drap has been set, but it actually isn't live at the start
10694 of the function, there is no reason to set it up. */
10695 if (crtl->drap_reg)
10696 {
10697 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10698 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10699 {
10700 crtl->drap_reg = NULL_RTX;
10701 crtl->need_drap = false;
10702 }
10703 }
10704 else
10705 cfun->machine->no_drap_save_restore = true;
10706
10707 frame_pointer_needed = false;
10708 stack_realign = false;
10709 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10710 crtl->stack_alignment_needed = incoming_stack_boundary;
10711 crtl->stack_alignment_estimated = incoming_stack_boundary;
10712 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10713 crtl->preferred_stack_boundary = incoming_stack_boundary;
10714 df_finish_pass (true);
10715 df_scan_alloc (NULL);
10716 df_scan_blocks ();
10717 df_compute_regs_ever_live (true);
10718 df_analyze ();
10719 }
10720
10721 crtl->stack_realign_needed = stack_realign;
10722 crtl->stack_realign_finalized = true;
10723 }
10724
10725 /* Expand the prologue into a bunch of separate insns. */
10726
10727 void
10728 ix86_expand_prologue (void)
10729 {
10730 struct machine_function *m = cfun->machine;
10731 rtx insn, t;
10732 bool pic_reg_used;
10733 struct ix86_frame frame;
10734 HOST_WIDE_INT allocate;
10735 bool int_registers_saved;
10736 bool sse_registers_saved;
10737
10738 ix86_finalize_stack_realign_flags ();
10739
10740 /* DRAP should not coexist with stack_realign_fp */
10741 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10742
10743 memset (&m->fs, 0, sizeof (m->fs));
10744
10745 /* Initialize CFA state for before the prologue. */
10746 m->fs.cfa_reg = stack_pointer_rtx;
10747 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10748
10749 /* Track SP offset to the CFA. We continue tracking this after we've
10750 swapped the CFA register away from SP. In the case of re-alignment
10751 this is fudged; we're interested to offsets within the local frame. */
10752 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10753 m->fs.sp_valid = true;
10754
10755 ix86_compute_frame_layout (&frame);
10756
10757 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10758 {
10759 /* We should have already generated an error for any use of
10760 ms_hook on a nested function. */
10761 gcc_checking_assert (!ix86_static_chain_on_stack);
10762
10763 /* Check if profiling is active and we shall use profiling before
10764 prologue variant. If so sorry. */
10765 if (crtl->profile && flag_fentry != 0)
10766 sorry ("ms_hook_prologue attribute isn%'t compatible "
10767 "with -mfentry for 32-bit");
10768
10769 /* In ix86_asm_output_function_label we emitted:
10770 8b ff movl.s %edi,%edi
10771 55 push %ebp
10772 8b ec movl.s %esp,%ebp
10773
10774 This matches the hookable function prologue in Win32 API
10775 functions in Microsoft Windows XP Service Pack 2 and newer.
10776 Wine uses this to enable Windows apps to hook the Win32 API
10777 functions provided by Wine.
10778
10779 What that means is that we've already set up the frame pointer. */
10780
10781 if (frame_pointer_needed
10782 && !(crtl->drap_reg && crtl->stack_realign_needed))
10783 {
10784 rtx push, mov;
10785
10786 /* We've decided to use the frame pointer already set up.
10787 Describe this to the unwinder by pretending that both
10788 push and mov insns happen right here.
10789
10790 Putting the unwind info here at the end of the ms_hook
10791 is done so that we can make absolutely certain we get
10792 the required byte sequence at the start of the function,
10793 rather than relying on an assembler that can produce
10794 the exact encoding required.
10795
10796 However it does mean (in the unpatched case) that we have
10797 a 1 insn window where the asynchronous unwind info is
10798 incorrect. However, if we placed the unwind info at
10799 its correct location we would have incorrect unwind info
10800 in the patched case. Which is probably all moot since
10801 I don't expect Wine generates dwarf2 unwind info for the
10802 system libraries that use this feature. */
10803
10804 insn = emit_insn (gen_blockage ());
10805
10806 push = gen_push (hard_frame_pointer_rtx);
10807 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10808 stack_pointer_rtx);
10809 RTX_FRAME_RELATED_P (push) = 1;
10810 RTX_FRAME_RELATED_P (mov) = 1;
10811
10812 RTX_FRAME_RELATED_P (insn) = 1;
10813 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10814 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10815
10816 /* Note that gen_push incremented m->fs.cfa_offset, even
10817 though we didn't emit the push insn here. */
10818 m->fs.cfa_reg = hard_frame_pointer_rtx;
10819 m->fs.fp_offset = m->fs.cfa_offset;
10820 m->fs.fp_valid = true;
10821 }
10822 else
10823 {
10824 /* The frame pointer is not needed so pop %ebp again.
10825 This leaves us with a pristine state. */
10826 emit_insn (gen_pop (hard_frame_pointer_rtx));
10827 }
10828 }
10829
10830 /* The first insn of a function that accepts its static chain on the
10831 stack is to push the register that would be filled in by a direct
10832 call. This insn will be skipped by the trampoline. */
10833 else if (ix86_static_chain_on_stack)
10834 {
10835 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10836 emit_insn (gen_blockage ());
10837
10838 /* We don't want to interpret this push insn as a register save,
10839 only as a stack adjustment. The real copy of the register as
10840 a save will be done later, if needed. */
10841 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10842 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10843 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10844 RTX_FRAME_RELATED_P (insn) = 1;
10845 }
10846
10847 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10848 of DRAP is needed and stack realignment is really needed after reload */
10849 if (stack_realign_drap)
10850 {
10851 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10852
10853 /* Only need to push parameter pointer reg if it is caller saved. */
10854 if (!call_used_regs[REGNO (crtl->drap_reg)])
10855 {
10856 /* Push arg pointer reg */
10857 insn = emit_insn (gen_push (crtl->drap_reg));
10858 RTX_FRAME_RELATED_P (insn) = 1;
10859 }
10860
10861 /* Grab the argument pointer. */
10862 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10863 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10864 RTX_FRAME_RELATED_P (insn) = 1;
10865 m->fs.cfa_reg = crtl->drap_reg;
10866 m->fs.cfa_offset = 0;
10867
10868 /* Align the stack. */
10869 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10870 stack_pointer_rtx,
10871 GEN_INT (-align_bytes)));
10872 RTX_FRAME_RELATED_P (insn) = 1;
10873
10874 /* Replicate the return address on the stack so that return
10875 address can be reached via (argp - 1) slot. This is needed
10876 to implement macro RETURN_ADDR_RTX and intrinsic function
10877 expand_builtin_return_addr etc. */
10878 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10879 t = gen_frame_mem (word_mode, t);
10880 insn = emit_insn (gen_push (t));
10881 RTX_FRAME_RELATED_P (insn) = 1;
10882
10883 /* For the purposes of frame and register save area addressing,
10884 we've started over with a new frame. */
10885 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10886 m->fs.realigned = true;
10887 }
10888
10889 int_registers_saved = (frame.nregs == 0);
10890 sse_registers_saved = (frame.nsseregs == 0);
10891
10892 if (frame_pointer_needed && !m->fs.fp_valid)
10893 {
10894 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10895 slower on all targets. Also sdb doesn't like it. */
10896 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10897 RTX_FRAME_RELATED_P (insn) = 1;
10898
10899 /* Push registers now, before setting the frame pointer
10900 on SEH target. */
10901 if (!int_registers_saved
10902 && TARGET_SEH
10903 && !frame.save_regs_using_mov)
10904 {
10905 ix86_emit_save_regs ();
10906 int_registers_saved = true;
10907 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10908 }
10909
10910 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10911 {
10912 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10913 RTX_FRAME_RELATED_P (insn) = 1;
10914
10915 if (m->fs.cfa_reg == stack_pointer_rtx)
10916 m->fs.cfa_reg = hard_frame_pointer_rtx;
10917 m->fs.fp_offset = m->fs.sp_offset;
10918 m->fs.fp_valid = true;
10919 }
10920 }
10921
10922 if (!int_registers_saved)
10923 {
10924 /* If saving registers via PUSH, do so now. */
10925 if (!frame.save_regs_using_mov)
10926 {
10927 ix86_emit_save_regs ();
10928 int_registers_saved = true;
10929 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10930 }
10931
10932 /* When using red zone we may start register saving before allocating
10933 the stack frame saving one cycle of the prologue. However, avoid
10934 doing this if we have to probe the stack; at least on x86_64 the
10935 stack probe can turn into a call that clobbers a red zone location. */
10936 else if (ix86_using_red_zone ()
10937 && (! TARGET_STACK_PROBE
10938 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10939 {
10940 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10941 int_registers_saved = true;
10942 }
10943 }
10944
10945 if (stack_realign_fp)
10946 {
10947 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10948 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10949
10950 /* The computation of the size of the re-aligned stack frame means
10951 that we must allocate the size of the register save area before
10952 performing the actual alignment. Otherwise we cannot guarantee
10953 that there's enough storage above the realignment point. */
10954 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10955 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10956 GEN_INT (m->fs.sp_offset
10957 - frame.sse_reg_save_offset),
10958 -1, false);
10959
10960 /* Align the stack. */
10961 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10962 stack_pointer_rtx,
10963 GEN_INT (-align_bytes)));
10964
10965 /* For the purposes of register save area addressing, the stack
10966 pointer is no longer valid. As for the value of sp_offset,
10967 see ix86_compute_frame_layout, which we need to match in order
10968 to pass verification of stack_pointer_offset at the end. */
10969 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10970 m->fs.sp_valid = false;
10971 }
10972
10973 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10974
10975 if (flag_stack_usage_info)
10976 {
10977 /* We start to count from ARG_POINTER. */
10978 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10979
10980 /* If it was realigned, take into account the fake frame. */
10981 if (stack_realign_drap)
10982 {
10983 if (ix86_static_chain_on_stack)
10984 stack_size += UNITS_PER_WORD;
10985
10986 if (!call_used_regs[REGNO (crtl->drap_reg)])
10987 stack_size += UNITS_PER_WORD;
10988
10989 /* This over-estimates by 1 minimal-stack-alignment-unit but
10990 mitigates that by counting in the new return address slot. */
10991 current_function_dynamic_stack_size
10992 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10993 }
10994
10995 current_function_static_stack_size = stack_size;
10996 }
10997
10998 /* On SEH target with very large frame size, allocate an area to save
10999 SSE registers (as the very large allocation won't be described). */
11000 if (TARGET_SEH
11001 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11002 && !sse_registers_saved)
11003 {
11004 HOST_WIDE_INT sse_size =
11005 frame.sse_reg_save_offset - frame.reg_save_offset;
11006
11007 gcc_assert (int_registers_saved);
11008
11009 /* No need to do stack checking as the area will be immediately
11010 written. */
11011 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11012 GEN_INT (-sse_size), -1,
11013 m->fs.cfa_reg == stack_pointer_rtx);
11014 allocate -= sse_size;
11015 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11016 sse_registers_saved = true;
11017 }
11018
11019 /* The stack has already been decremented by the instruction calling us
11020 so probe if the size is non-negative to preserve the protection area. */
11021 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11022 {
11023 /* We expect the registers to be saved when probes are used. */
11024 gcc_assert (int_registers_saved);
11025
11026 if (STACK_CHECK_MOVING_SP)
11027 {
11028 if (!(crtl->is_leaf && !cfun->calls_alloca
11029 && allocate <= PROBE_INTERVAL))
11030 {
11031 ix86_adjust_stack_and_probe (allocate);
11032 allocate = 0;
11033 }
11034 }
11035 else
11036 {
11037 HOST_WIDE_INT size = allocate;
11038
11039 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11040 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11041
11042 if (TARGET_STACK_PROBE)
11043 {
11044 if (crtl->is_leaf && !cfun->calls_alloca)
11045 {
11046 if (size > PROBE_INTERVAL)
11047 ix86_emit_probe_stack_range (0, size);
11048 }
11049 else
11050 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11051 }
11052 else
11053 {
11054 if (crtl->is_leaf && !cfun->calls_alloca)
11055 {
11056 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11057 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11058 size - STACK_CHECK_PROTECT);
11059 }
11060 else
11061 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11062 }
11063 }
11064 }
11065
11066 if (allocate == 0)
11067 ;
11068 else if (!ix86_target_stack_probe ()
11069 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11070 {
11071 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11072 GEN_INT (-allocate), -1,
11073 m->fs.cfa_reg == stack_pointer_rtx);
11074 }
11075 else
11076 {
11077 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11078 rtx r10 = NULL;
11079 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11080 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11081 bool eax_live = ix86_eax_live_at_start_p ();
11082 bool r10_live = false;
11083
11084 if (TARGET_64BIT)
11085 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11086
11087 if (eax_live)
11088 {
11089 insn = emit_insn (gen_push (eax));
11090 allocate -= UNITS_PER_WORD;
11091 /* Note that SEH directives need to continue tracking the stack
11092 pointer even after the frame pointer has been set up. */
11093 if (sp_is_cfa_reg || TARGET_SEH)
11094 {
11095 if (sp_is_cfa_reg)
11096 m->fs.cfa_offset += UNITS_PER_WORD;
11097 RTX_FRAME_RELATED_P (insn) = 1;
11098 }
11099 }
11100
11101 if (r10_live)
11102 {
11103 r10 = gen_rtx_REG (Pmode, R10_REG);
11104 insn = emit_insn (gen_push (r10));
11105 allocate -= UNITS_PER_WORD;
11106 if (sp_is_cfa_reg || TARGET_SEH)
11107 {
11108 if (sp_is_cfa_reg)
11109 m->fs.cfa_offset += UNITS_PER_WORD;
11110 RTX_FRAME_RELATED_P (insn) = 1;
11111 }
11112 }
11113
11114 emit_move_insn (eax, GEN_INT (allocate));
11115 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11116
11117 /* Use the fact that AX still contains ALLOCATE. */
11118 adjust_stack_insn = (Pmode == DImode
11119 ? gen_pro_epilogue_adjust_stack_di_sub
11120 : gen_pro_epilogue_adjust_stack_si_sub);
11121
11122 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11123 stack_pointer_rtx, eax));
11124
11125 if (sp_is_cfa_reg || TARGET_SEH)
11126 {
11127 if (sp_is_cfa_reg)
11128 m->fs.cfa_offset += allocate;
11129 RTX_FRAME_RELATED_P (insn) = 1;
11130 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11131 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11132 plus_constant (Pmode, stack_pointer_rtx,
11133 -allocate)));
11134 }
11135 m->fs.sp_offset += allocate;
11136
11137 /* Use stack_pointer_rtx for relative addressing so that code
11138 works for realigned stack, too. */
11139 if (r10_live && eax_live)
11140 {
11141 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11142 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11143 gen_frame_mem (word_mode, t));
11144 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11145 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11146 gen_frame_mem (word_mode, t));
11147 }
11148 else if (eax_live || r10_live)
11149 {
11150 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11151 emit_move_insn (gen_rtx_REG (word_mode,
11152 (eax_live ? AX_REG : R10_REG)),
11153 gen_frame_mem (word_mode, t));
11154 }
11155 }
11156 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11157
11158 /* If we havn't already set up the frame pointer, do so now. */
11159 if (frame_pointer_needed && !m->fs.fp_valid)
11160 {
11161 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11162 GEN_INT (frame.stack_pointer_offset
11163 - frame.hard_frame_pointer_offset));
11164 insn = emit_insn (insn);
11165 RTX_FRAME_RELATED_P (insn) = 1;
11166 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11167
11168 if (m->fs.cfa_reg == stack_pointer_rtx)
11169 m->fs.cfa_reg = hard_frame_pointer_rtx;
11170 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11171 m->fs.fp_valid = true;
11172 }
11173
11174 if (!int_registers_saved)
11175 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11176 if (!sse_registers_saved)
11177 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11178
11179 pic_reg_used = false;
11180 /* We don't use pic-register for pe-coff target. */
11181 if (pic_offset_table_rtx
11182 && !TARGET_PECOFF
11183 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11184 || crtl->profile))
11185 {
11186 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11187
11188 if (alt_pic_reg_used != INVALID_REGNUM)
11189 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11190
11191 pic_reg_used = true;
11192 }
11193
11194 if (pic_reg_used)
11195 {
11196 if (TARGET_64BIT)
11197 {
11198 if (ix86_cmodel == CM_LARGE_PIC)
11199 {
11200 rtx label, tmp_reg;
11201
11202 gcc_assert (Pmode == DImode);
11203 label = gen_label_rtx ();
11204 emit_label (label);
11205 LABEL_PRESERVE_P (label) = 1;
11206 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11207 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11208 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11209 label));
11210 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11211 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11212 pic_offset_table_rtx, tmp_reg));
11213 }
11214 else
11215 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11216 }
11217 else
11218 {
11219 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11220 RTX_FRAME_RELATED_P (insn) = 1;
11221 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11222 }
11223 }
11224
11225 /* In the pic_reg_used case, make sure that the got load isn't deleted
11226 when mcount needs it. Blockage to avoid call movement across mcount
11227 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11228 note. */
11229 if (crtl->profile && !flag_fentry && pic_reg_used)
11230 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11231
11232 if (crtl->drap_reg && !crtl->stack_realign_needed)
11233 {
11234 /* vDRAP is setup but after reload it turns out stack realign
11235 isn't necessary, here we will emit prologue to setup DRAP
11236 without stack realign adjustment */
11237 t = choose_baseaddr (0);
11238 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11239 }
11240
11241 /* Prevent instructions from being scheduled into register save push
11242 sequence when access to the redzone area is done through frame pointer.
11243 The offset between the frame pointer and the stack pointer is calculated
11244 relative to the value of the stack pointer at the end of the function
11245 prologue, and moving instructions that access redzone area via frame
11246 pointer inside push sequence violates this assumption. */
11247 if (frame_pointer_needed && frame.red_zone_size)
11248 emit_insn (gen_memory_blockage ());
11249
11250 /* Emit cld instruction if stringops are used in the function. */
11251 if (TARGET_CLD && ix86_current_function_needs_cld)
11252 emit_insn (gen_cld ());
11253
11254 /* SEH requires that the prologue end within 256 bytes of the start of
11255 the function. Prevent instruction schedules that would extend that.
11256 Further, prevent alloca modifications to the stack pointer from being
11257 combined with prologue modifications. */
11258 if (TARGET_SEH)
11259 emit_insn (gen_prologue_use (stack_pointer_rtx));
11260 }
11261
11262 /* Emit code to restore REG using a POP insn. */
11263
11264 static void
11265 ix86_emit_restore_reg_using_pop (rtx reg)
11266 {
11267 struct machine_function *m = cfun->machine;
11268 rtx insn = emit_insn (gen_pop (reg));
11269
11270 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11271 m->fs.sp_offset -= UNITS_PER_WORD;
11272
11273 if (m->fs.cfa_reg == crtl->drap_reg
11274 && REGNO (reg) == REGNO (crtl->drap_reg))
11275 {
11276 /* Previously we'd represented the CFA as an expression
11277 like *(%ebp - 8). We've just popped that value from
11278 the stack, which means we need to reset the CFA to
11279 the drap register. This will remain until we restore
11280 the stack pointer. */
11281 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11282 RTX_FRAME_RELATED_P (insn) = 1;
11283
11284 /* This means that the DRAP register is valid for addressing too. */
11285 m->fs.drap_valid = true;
11286 return;
11287 }
11288
11289 if (m->fs.cfa_reg == stack_pointer_rtx)
11290 {
11291 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11292 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11293 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11294 RTX_FRAME_RELATED_P (insn) = 1;
11295
11296 m->fs.cfa_offset -= UNITS_PER_WORD;
11297 }
11298
11299 /* When the frame pointer is the CFA, and we pop it, we are
11300 swapping back to the stack pointer as the CFA. This happens
11301 for stack frames that don't allocate other data, so we assume
11302 the stack pointer is now pointing at the return address, i.e.
11303 the function entry state, which makes the offset be 1 word. */
11304 if (reg == hard_frame_pointer_rtx)
11305 {
11306 m->fs.fp_valid = false;
11307 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11308 {
11309 m->fs.cfa_reg = stack_pointer_rtx;
11310 m->fs.cfa_offset -= UNITS_PER_WORD;
11311
11312 add_reg_note (insn, REG_CFA_DEF_CFA,
11313 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11314 GEN_INT (m->fs.cfa_offset)));
11315 RTX_FRAME_RELATED_P (insn) = 1;
11316 }
11317 }
11318 }
11319
11320 /* Emit code to restore saved registers using POP insns. */
11321
11322 static void
11323 ix86_emit_restore_regs_using_pop (void)
11324 {
11325 unsigned int regno;
11326
11327 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11328 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11329 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11330 }
11331
11332 /* Emit code and notes for the LEAVE instruction. */
11333
11334 static void
11335 ix86_emit_leave (void)
11336 {
11337 struct machine_function *m = cfun->machine;
11338 rtx insn = emit_insn (ix86_gen_leave ());
11339
11340 ix86_add_queued_cfa_restore_notes (insn);
11341
11342 gcc_assert (m->fs.fp_valid);
11343 m->fs.sp_valid = true;
11344 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11345 m->fs.fp_valid = false;
11346
11347 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11348 {
11349 m->fs.cfa_reg = stack_pointer_rtx;
11350 m->fs.cfa_offset = m->fs.sp_offset;
11351
11352 add_reg_note (insn, REG_CFA_DEF_CFA,
11353 plus_constant (Pmode, stack_pointer_rtx,
11354 m->fs.sp_offset));
11355 RTX_FRAME_RELATED_P (insn) = 1;
11356 }
11357 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11358 m->fs.fp_offset);
11359 }
11360
11361 /* Emit code to restore saved registers using MOV insns.
11362 First register is restored from CFA - CFA_OFFSET. */
11363 static void
11364 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11365 bool maybe_eh_return)
11366 {
11367 struct machine_function *m = cfun->machine;
11368 unsigned int regno;
11369
11370 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11371 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11372 {
11373 rtx reg = gen_rtx_REG (word_mode, regno);
11374 rtx insn, mem;
11375
11376 mem = choose_baseaddr (cfa_offset);
11377 mem = gen_frame_mem (word_mode, mem);
11378 insn = emit_move_insn (reg, mem);
11379
11380 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11381 {
11382 /* Previously we'd represented the CFA as an expression
11383 like *(%ebp - 8). We've just popped that value from
11384 the stack, which means we need to reset the CFA to
11385 the drap register. This will remain until we restore
11386 the stack pointer. */
11387 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11388 RTX_FRAME_RELATED_P (insn) = 1;
11389
11390 /* This means that the DRAP register is valid for addressing. */
11391 m->fs.drap_valid = true;
11392 }
11393 else
11394 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11395
11396 cfa_offset -= UNITS_PER_WORD;
11397 }
11398 }
11399
11400 /* Emit code to restore saved registers using MOV insns.
11401 First register is restored from CFA - CFA_OFFSET. */
11402 static void
11403 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11404 bool maybe_eh_return)
11405 {
11406 unsigned int regno;
11407
11408 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11409 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11410 {
11411 rtx reg = gen_rtx_REG (V4SFmode, regno);
11412 rtx mem;
11413
11414 mem = choose_baseaddr (cfa_offset);
11415 mem = gen_rtx_MEM (V4SFmode, mem);
11416 set_mem_align (mem, 128);
11417 emit_move_insn (reg, mem);
11418
11419 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11420
11421 cfa_offset -= 16;
11422 }
11423 }
11424
11425 /* Restore function stack, frame, and registers. */
11426
11427 void
11428 ix86_expand_epilogue (int style)
11429 {
11430 struct machine_function *m = cfun->machine;
11431 struct machine_frame_state frame_state_save = m->fs;
11432 struct ix86_frame frame;
11433 bool restore_regs_via_mov;
11434 bool using_drap;
11435
11436 ix86_finalize_stack_realign_flags ();
11437 ix86_compute_frame_layout (&frame);
11438
11439 m->fs.sp_valid = (!frame_pointer_needed
11440 || (crtl->sp_is_unchanging
11441 && !stack_realign_fp));
11442 gcc_assert (!m->fs.sp_valid
11443 || m->fs.sp_offset == frame.stack_pointer_offset);
11444
11445 /* The FP must be valid if the frame pointer is present. */
11446 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11447 gcc_assert (!m->fs.fp_valid
11448 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11449
11450 /* We must have *some* valid pointer to the stack frame. */
11451 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11452
11453 /* The DRAP is never valid at this point. */
11454 gcc_assert (!m->fs.drap_valid);
11455
11456 /* See the comment about red zone and frame
11457 pointer usage in ix86_expand_prologue. */
11458 if (frame_pointer_needed && frame.red_zone_size)
11459 emit_insn (gen_memory_blockage ());
11460
11461 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11462 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11463
11464 /* Determine the CFA offset of the end of the red-zone. */
11465 m->fs.red_zone_offset = 0;
11466 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11467 {
11468 /* The red-zone begins below the return address. */
11469 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11470
11471 /* When the register save area is in the aligned portion of
11472 the stack, determine the maximum runtime displacement that
11473 matches up with the aligned frame. */
11474 if (stack_realign_drap)
11475 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11476 + UNITS_PER_WORD);
11477 }
11478
11479 /* Special care must be taken for the normal return case of a function
11480 using eh_return: the eax and edx registers are marked as saved, but
11481 not restored along this path. Adjust the save location to match. */
11482 if (crtl->calls_eh_return && style != 2)
11483 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11484
11485 /* EH_RETURN requires the use of moves to function properly. */
11486 if (crtl->calls_eh_return)
11487 restore_regs_via_mov = true;
11488 /* SEH requires the use of pops to identify the epilogue. */
11489 else if (TARGET_SEH)
11490 restore_regs_via_mov = false;
11491 /* If we're only restoring one register and sp is not valid then
11492 using a move instruction to restore the register since it's
11493 less work than reloading sp and popping the register. */
11494 else if (!m->fs.sp_valid && frame.nregs <= 1)
11495 restore_regs_via_mov = true;
11496 else if (TARGET_EPILOGUE_USING_MOVE
11497 && cfun->machine->use_fast_prologue_epilogue
11498 && (frame.nregs > 1
11499 || m->fs.sp_offset != frame.reg_save_offset))
11500 restore_regs_via_mov = true;
11501 else if (frame_pointer_needed
11502 && !frame.nregs
11503 && m->fs.sp_offset != frame.reg_save_offset)
11504 restore_regs_via_mov = true;
11505 else if (frame_pointer_needed
11506 && TARGET_USE_LEAVE
11507 && cfun->machine->use_fast_prologue_epilogue
11508 && frame.nregs == 1)
11509 restore_regs_via_mov = true;
11510 else
11511 restore_regs_via_mov = false;
11512
11513 if (restore_regs_via_mov || frame.nsseregs)
11514 {
11515 /* Ensure that the entire register save area is addressable via
11516 the stack pointer, if we will restore via sp. */
11517 if (TARGET_64BIT
11518 && m->fs.sp_offset > 0x7fffffff
11519 && !(m->fs.fp_valid || m->fs.drap_valid)
11520 && (frame.nsseregs + frame.nregs) != 0)
11521 {
11522 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11523 GEN_INT (m->fs.sp_offset
11524 - frame.sse_reg_save_offset),
11525 style,
11526 m->fs.cfa_reg == stack_pointer_rtx);
11527 }
11528 }
11529
11530 /* If there are any SSE registers to restore, then we have to do it
11531 via moves, since there's obviously no pop for SSE regs. */
11532 if (frame.nsseregs)
11533 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11534 style == 2);
11535
11536 if (restore_regs_via_mov)
11537 {
11538 rtx t;
11539
11540 if (frame.nregs)
11541 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11542
11543 /* eh_return epilogues need %ecx added to the stack pointer. */
11544 if (style == 2)
11545 {
11546 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11547
11548 /* Stack align doesn't work with eh_return. */
11549 gcc_assert (!stack_realign_drap);
11550 /* Neither does regparm nested functions. */
11551 gcc_assert (!ix86_static_chain_on_stack);
11552
11553 if (frame_pointer_needed)
11554 {
11555 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11556 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11557 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11558
11559 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11560 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11561
11562 /* Note that we use SA as a temporary CFA, as the return
11563 address is at the proper place relative to it. We
11564 pretend this happens at the FP restore insn because
11565 prior to this insn the FP would be stored at the wrong
11566 offset relative to SA, and after this insn we have no
11567 other reasonable register to use for the CFA. We don't
11568 bother resetting the CFA to the SP for the duration of
11569 the return insn. */
11570 add_reg_note (insn, REG_CFA_DEF_CFA,
11571 plus_constant (Pmode, sa, UNITS_PER_WORD));
11572 ix86_add_queued_cfa_restore_notes (insn);
11573 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11574 RTX_FRAME_RELATED_P (insn) = 1;
11575
11576 m->fs.cfa_reg = sa;
11577 m->fs.cfa_offset = UNITS_PER_WORD;
11578 m->fs.fp_valid = false;
11579
11580 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11581 const0_rtx, style, false);
11582 }
11583 else
11584 {
11585 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11586 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11587 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11588 ix86_add_queued_cfa_restore_notes (insn);
11589
11590 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11591 if (m->fs.cfa_offset != UNITS_PER_WORD)
11592 {
11593 m->fs.cfa_offset = UNITS_PER_WORD;
11594 add_reg_note (insn, REG_CFA_DEF_CFA,
11595 plus_constant (Pmode, stack_pointer_rtx,
11596 UNITS_PER_WORD));
11597 RTX_FRAME_RELATED_P (insn) = 1;
11598 }
11599 }
11600 m->fs.sp_offset = UNITS_PER_WORD;
11601 m->fs.sp_valid = true;
11602 }
11603 }
11604 else
11605 {
11606 /* SEH requires that the function end with (1) a stack adjustment
11607 if necessary, (2) a sequence of pops, and (3) a return or
11608 jump instruction. Prevent insns from the function body from
11609 being scheduled into this sequence. */
11610 if (TARGET_SEH)
11611 {
11612 /* Prevent a catch region from being adjacent to the standard
11613 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11614 several other flags that would be interesting to test are
11615 not yet set up. */
11616 if (flag_non_call_exceptions)
11617 emit_insn (gen_nops (const1_rtx));
11618 else
11619 emit_insn (gen_blockage ());
11620 }
11621
11622 /* First step is to deallocate the stack frame so that we can
11623 pop the registers. Also do it on SEH target for very large
11624 frame as the emitted instructions aren't allowed by the ABI in
11625 epilogues. */
11626 if (!m->fs.sp_valid
11627 || (TARGET_SEH
11628 && (m->fs.sp_offset - frame.reg_save_offset
11629 >= SEH_MAX_FRAME_SIZE)))
11630 {
11631 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11632 GEN_INT (m->fs.fp_offset
11633 - frame.reg_save_offset),
11634 style, false);
11635 }
11636 else if (m->fs.sp_offset != frame.reg_save_offset)
11637 {
11638 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11639 GEN_INT (m->fs.sp_offset
11640 - frame.reg_save_offset),
11641 style,
11642 m->fs.cfa_reg == stack_pointer_rtx);
11643 }
11644
11645 ix86_emit_restore_regs_using_pop ();
11646 }
11647
11648 /* If we used a stack pointer and haven't already got rid of it,
11649 then do so now. */
11650 if (m->fs.fp_valid)
11651 {
11652 /* If the stack pointer is valid and pointing at the frame
11653 pointer store address, then we only need a pop. */
11654 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11655 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11656 /* Leave results in shorter dependency chains on CPUs that are
11657 able to grok it fast. */
11658 else if (TARGET_USE_LEAVE
11659 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11660 || !cfun->machine->use_fast_prologue_epilogue)
11661 ix86_emit_leave ();
11662 else
11663 {
11664 pro_epilogue_adjust_stack (stack_pointer_rtx,
11665 hard_frame_pointer_rtx,
11666 const0_rtx, style, !using_drap);
11667 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11668 }
11669 }
11670
11671 if (using_drap)
11672 {
11673 int param_ptr_offset = UNITS_PER_WORD;
11674 rtx insn;
11675
11676 gcc_assert (stack_realign_drap);
11677
11678 if (ix86_static_chain_on_stack)
11679 param_ptr_offset += UNITS_PER_WORD;
11680 if (!call_used_regs[REGNO (crtl->drap_reg)])
11681 param_ptr_offset += UNITS_PER_WORD;
11682
11683 insn = emit_insn (gen_rtx_SET
11684 (VOIDmode, stack_pointer_rtx,
11685 gen_rtx_PLUS (Pmode,
11686 crtl->drap_reg,
11687 GEN_INT (-param_ptr_offset))));
11688 m->fs.cfa_reg = stack_pointer_rtx;
11689 m->fs.cfa_offset = param_ptr_offset;
11690 m->fs.sp_offset = param_ptr_offset;
11691 m->fs.realigned = false;
11692
11693 add_reg_note (insn, REG_CFA_DEF_CFA,
11694 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11695 GEN_INT (param_ptr_offset)));
11696 RTX_FRAME_RELATED_P (insn) = 1;
11697
11698 if (!call_used_regs[REGNO (crtl->drap_reg)])
11699 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11700 }
11701
11702 /* At this point the stack pointer must be valid, and we must have
11703 restored all of the registers. We may not have deallocated the
11704 entire stack frame. We've delayed this until now because it may
11705 be possible to merge the local stack deallocation with the
11706 deallocation forced by ix86_static_chain_on_stack. */
11707 gcc_assert (m->fs.sp_valid);
11708 gcc_assert (!m->fs.fp_valid);
11709 gcc_assert (!m->fs.realigned);
11710 if (m->fs.sp_offset != UNITS_PER_WORD)
11711 {
11712 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11713 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11714 style, true);
11715 }
11716 else
11717 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11718
11719 /* Sibcall epilogues don't want a return instruction. */
11720 if (style == 0)
11721 {
11722 m->fs = frame_state_save;
11723 return;
11724 }
11725
11726 if (crtl->args.pops_args && crtl->args.size)
11727 {
11728 rtx popc = GEN_INT (crtl->args.pops_args);
11729
11730 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11731 address, do explicit add, and jump indirectly to the caller. */
11732
11733 if (crtl->args.pops_args >= 65536)
11734 {
11735 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11736 rtx insn;
11737
11738 /* There is no "pascal" calling convention in any 64bit ABI. */
11739 gcc_assert (!TARGET_64BIT);
11740
11741 insn = emit_insn (gen_pop (ecx));
11742 m->fs.cfa_offset -= UNITS_PER_WORD;
11743 m->fs.sp_offset -= UNITS_PER_WORD;
11744
11745 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11746 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11747 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11748 add_reg_note (insn, REG_CFA_REGISTER,
11749 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11750 RTX_FRAME_RELATED_P (insn) = 1;
11751
11752 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11753 popc, -1, true);
11754 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11755 }
11756 else
11757 emit_jump_insn (gen_simple_return_pop_internal (popc));
11758 }
11759 else
11760 emit_jump_insn (gen_simple_return_internal ());
11761
11762 /* Restore the state back to the state from the prologue,
11763 so that it's correct for the next epilogue. */
11764 m->fs = frame_state_save;
11765 }
11766
11767 /* Reset from the function's potential modifications. */
11768
11769 static void
11770 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11771 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11772 {
11773 if (pic_offset_table_rtx)
11774 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11775 #if TARGET_MACHO
11776 /* Mach-O doesn't support labels at the end of objects, so if
11777 it looks like we might want one, insert a NOP. */
11778 {
11779 rtx insn = get_last_insn ();
11780 rtx deleted_debug_label = NULL_RTX;
11781 while (insn
11782 && NOTE_P (insn)
11783 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11784 {
11785 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11786 notes only, instead set their CODE_LABEL_NUMBER to -1,
11787 otherwise there would be code generation differences
11788 in between -g and -g0. */
11789 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11790 deleted_debug_label = insn;
11791 insn = PREV_INSN (insn);
11792 }
11793 if (insn
11794 && (LABEL_P (insn)
11795 || (NOTE_P (insn)
11796 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11797 fputs ("\tnop\n", file);
11798 else if (deleted_debug_label)
11799 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11800 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11801 CODE_LABEL_NUMBER (insn) = -1;
11802 }
11803 #endif
11804
11805 }
11806
11807 /* Return a scratch register to use in the split stack prologue. The
11808 split stack prologue is used for -fsplit-stack. It is the first
11809 instructions in the function, even before the regular prologue.
11810 The scratch register can be any caller-saved register which is not
11811 used for parameters or for the static chain. */
11812
11813 static unsigned int
11814 split_stack_prologue_scratch_regno (void)
11815 {
11816 if (TARGET_64BIT)
11817 return R11_REG;
11818 else
11819 {
11820 bool is_fastcall, is_thiscall;
11821 int regparm;
11822
11823 is_fastcall = (lookup_attribute ("fastcall",
11824 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11825 != NULL);
11826 is_thiscall = (lookup_attribute ("thiscall",
11827 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11828 != NULL);
11829 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11830
11831 if (is_fastcall)
11832 {
11833 if (DECL_STATIC_CHAIN (cfun->decl))
11834 {
11835 sorry ("-fsplit-stack does not support fastcall with "
11836 "nested function");
11837 return INVALID_REGNUM;
11838 }
11839 return AX_REG;
11840 }
11841 else if (is_thiscall)
11842 {
11843 if (!DECL_STATIC_CHAIN (cfun->decl))
11844 return DX_REG;
11845 return AX_REG;
11846 }
11847 else if (regparm < 3)
11848 {
11849 if (!DECL_STATIC_CHAIN (cfun->decl))
11850 return CX_REG;
11851 else
11852 {
11853 if (regparm >= 2)
11854 {
11855 sorry ("-fsplit-stack does not support 2 register "
11856 "parameters for a nested function");
11857 return INVALID_REGNUM;
11858 }
11859 return DX_REG;
11860 }
11861 }
11862 else
11863 {
11864 /* FIXME: We could make this work by pushing a register
11865 around the addition and comparison. */
11866 sorry ("-fsplit-stack does not support 3 register parameters");
11867 return INVALID_REGNUM;
11868 }
11869 }
11870 }
11871
11872 /* A SYMBOL_REF for the function which allocates new stackspace for
11873 -fsplit-stack. */
11874
11875 static GTY(()) rtx split_stack_fn;
11876
11877 /* A SYMBOL_REF for the more stack function when using the large
11878 model. */
11879
11880 static GTY(()) rtx split_stack_fn_large;
11881
11882 /* Handle -fsplit-stack. These are the first instructions in the
11883 function, even before the regular prologue. */
11884
11885 void
11886 ix86_expand_split_stack_prologue (void)
11887 {
11888 struct ix86_frame frame;
11889 HOST_WIDE_INT allocate;
11890 unsigned HOST_WIDE_INT args_size;
11891 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11892 rtx scratch_reg = NULL_RTX;
11893 rtx varargs_label = NULL_RTX;
11894 rtx fn;
11895
11896 gcc_assert (flag_split_stack && reload_completed);
11897
11898 ix86_finalize_stack_realign_flags ();
11899 ix86_compute_frame_layout (&frame);
11900 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11901
11902 /* This is the label we will branch to if we have enough stack
11903 space. We expect the basic block reordering pass to reverse this
11904 branch if optimizing, so that we branch in the unlikely case. */
11905 label = gen_label_rtx ();
11906
11907 /* We need to compare the stack pointer minus the frame size with
11908 the stack boundary in the TCB. The stack boundary always gives
11909 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11910 can compare directly. Otherwise we need to do an addition. */
11911
11912 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11913 UNSPEC_STACK_CHECK);
11914 limit = gen_rtx_CONST (Pmode, limit);
11915 limit = gen_rtx_MEM (Pmode, limit);
11916 if (allocate < SPLIT_STACK_AVAILABLE)
11917 current = stack_pointer_rtx;
11918 else
11919 {
11920 unsigned int scratch_regno;
11921 rtx offset;
11922
11923 /* We need a scratch register to hold the stack pointer minus
11924 the required frame size. Since this is the very start of the
11925 function, the scratch register can be any caller-saved
11926 register which is not used for parameters. */
11927 offset = GEN_INT (- allocate);
11928 scratch_regno = split_stack_prologue_scratch_regno ();
11929 if (scratch_regno == INVALID_REGNUM)
11930 return;
11931 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11932 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11933 {
11934 /* We don't use ix86_gen_add3 in this case because it will
11935 want to split to lea, but when not optimizing the insn
11936 will not be split after this point. */
11937 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11938 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11939 offset)));
11940 }
11941 else
11942 {
11943 emit_move_insn (scratch_reg, offset);
11944 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11945 stack_pointer_rtx));
11946 }
11947 current = scratch_reg;
11948 }
11949
11950 ix86_expand_branch (GEU, current, limit, label);
11951 jump_insn = get_last_insn ();
11952 JUMP_LABEL (jump_insn) = label;
11953
11954 /* Mark the jump as very likely to be taken. */
11955 add_int_reg_note (jump_insn, REG_BR_PROB,
11956 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11957
11958 if (split_stack_fn == NULL_RTX)
11959 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11960 fn = split_stack_fn;
11961
11962 /* Get more stack space. We pass in the desired stack space and the
11963 size of the arguments to copy to the new stack. In 32-bit mode
11964 we push the parameters; __morestack will return on a new stack
11965 anyhow. In 64-bit mode we pass the parameters in r10 and
11966 r11. */
11967 allocate_rtx = GEN_INT (allocate);
11968 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11969 call_fusage = NULL_RTX;
11970 if (TARGET_64BIT)
11971 {
11972 rtx reg10, reg11;
11973
11974 reg10 = gen_rtx_REG (Pmode, R10_REG);
11975 reg11 = gen_rtx_REG (Pmode, R11_REG);
11976
11977 /* If this function uses a static chain, it will be in %r10.
11978 Preserve it across the call to __morestack. */
11979 if (DECL_STATIC_CHAIN (cfun->decl))
11980 {
11981 rtx rax;
11982
11983 rax = gen_rtx_REG (word_mode, AX_REG);
11984 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11985 use_reg (&call_fusage, rax);
11986 }
11987
11988 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11989 && !TARGET_PECOFF)
11990 {
11991 HOST_WIDE_INT argval;
11992
11993 gcc_assert (Pmode == DImode);
11994 /* When using the large model we need to load the address
11995 into a register, and we've run out of registers. So we
11996 switch to a different calling convention, and we call a
11997 different function: __morestack_large. We pass the
11998 argument size in the upper 32 bits of r10 and pass the
11999 frame size in the lower 32 bits. */
12000 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12001 gcc_assert ((args_size & 0xffffffff) == args_size);
12002
12003 if (split_stack_fn_large == NULL_RTX)
12004 split_stack_fn_large =
12005 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12006
12007 if (ix86_cmodel == CM_LARGE_PIC)
12008 {
12009 rtx label, x;
12010
12011 label = gen_label_rtx ();
12012 emit_label (label);
12013 LABEL_PRESERVE_P (label) = 1;
12014 emit_insn (gen_set_rip_rex64 (reg10, label));
12015 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12016 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12017 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12018 UNSPEC_GOT);
12019 x = gen_rtx_CONST (Pmode, x);
12020 emit_move_insn (reg11, x);
12021 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12022 x = gen_const_mem (Pmode, x);
12023 emit_move_insn (reg11, x);
12024 }
12025 else
12026 emit_move_insn (reg11, split_stack_fn_large);
12027
12028 fn = reg11;
12029
12030 argval = ((args_size << 16) << 16) + allocate;
12031 emit_move_insn (reg10, GEN_INT (argval));
12032 }
12033 else
12034 {
12035 emit_move_insn (reg10, allocate_rtx);
12036 emit_move_insn (reg11, GEN_INT (args_size));
12037 use_reg (&call_fusage, reg11);
12038 }
12039
12040 use_reg (&call_fusage, reg10);
12041 }
12042 else
12043 {
12044 emit_insn (gen_push (GEN_INT (args_size)));
12045 emit_insn (gen_push (allocate_rtx));
12046 }
12047 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12048 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12049 NULL_RTX, false);
12050 add_function_usage_to (call_insn, call_fusage);
12051
12052 /* In order to make call/return prediction work right, we now need
12053 to execute a return instruction. See
12054 libgcc/config/i386/morestack.S for the details on how this works.
12055
12056 For flow purposes gcc must not see this as a return
12057 instruction--we need control flow to continue at the subsequent
12058 label. Therefore, we use an unspec. */
12059 gcc_assert (crtl->args.pops_args < 65536);
12060 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12061
12062 /* If we are in 64-bit mode and this function uses a static chain,
12063 we saved %r10 in %rax before calling _morestack. */
12064 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12065 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12066 gen_rtx_REG (word_mode, AX_REG));
12067
12068 /* If this function calls va_start, we need to store a pointer to
12069 the arguments on the old stack, because they may not have been
12070 all copied to the new stack. At this point the old stack can be
12071 found at the frame pointer value used by __morestack, because
12072 __morestack has set that up before calling back to us. Here we
12073 store that pointer in a scratch register, and in
12074 ix86_expand_prologue we store the scratch register in a stack
12075 slot. */
12076 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12077 {
12078 unsigned int scratch_regno;
12079 rtx frame_reg;
12080 int words;
12081
12082 scratch_regno = split_stack_prologue_scratch_regno ();
12083 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12084 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12085
12086 /* 64-bit:
12087 fp -> old fp value
12088 return address within this function
12089 return address of caller of this function
12090 stack arguments
12091 So we add three words to get to the stack arguments.
12092
12093 32-bit:
12094 fp -> old fp value
12095 return address within this function
12096 first argument to __morestack
12097 second argument to __morestack
12098 return address of caller of this function
12099 stack arguments
12100 So we add five words to get to the stack arguments.
12101 */
12102 words = TARGET_64BIT ? 3 : 5;
12103 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12104 gen_rtx_PLUS (Pmode, frame_reg,
12105 GEN_INT (words * UNITS_PER_WORD))));
12106
12107 varargs_label = gen_label_rtx ();
12108 emit_jump_insn (gen_jump (varargs_label));
12109 JUMP_LABEL (get_last_insn ()) = varargs_label;
12110
12111 emit_barrier ();
12112 }
12113
12114 emit_label (label);
12115 LABEL_NUSES (label) = 1;
12116
12117 /* If this function calls va_start, we now have to set the scratch
12118 register for the case where we do not call __morestack. In this
12119 case we need to set it based on the stack pointer. */
12120 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12121 {
12122 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12123 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12124 GEN_INT (UNITS_PER_WORD))));
12125
12126 emit_label (varargs_label);
12127 LABEL_NUSES (varargs_label) = 1;
12128 }
12129 }
12130
12131 /* We may have to tell the dataflow pass that the split stack prologue
12132 is initializing a scratch register. */
12133
12134 static void
12135 ix86_live_on_entry (bitmap regs)
12136 {
12137 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12138 {
12139 gcc_assert (flag_split_stack);
12140 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12141 }
12142 }
12143 \f
12144 /* Extract the parts of an RTL expression that is a valid memory address
12145 for an instruction. Return 0 if the structure of the address is
12146 grossly off. Return -1 if the address contains ASHIFT, so it is not
12147 strictly valid, but still used for computing length of lea instruction. */
12148
12149 int
12150 ix86_decompose_address (rtx addr, struct ix86_address *out)
12151 {
12152 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12153 rtx base_reg, index_reg;
12154 HOST_WIDE_INT scale = 1;
12155 rtx scale_rtx = NULL_RTX;
12156 rtx tmp;
12157 int retval = 1;
12158 enum ix86_address_seg seg = SEG_DEFAULT;
12159
12160 /* Allow zero-extended SImode addresses,
12161 they will be emitted with addr32 prefix. */
12162 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12163 {
12164 if (GET_CODE (addr) == ZERO_EXTEND
12165 && GET_MODE (XEXP (addr, 0)) == SImode)
12166 {
12167 addr = XEXP (addr, 0);
12168 if (CONST_INT_P (addr))
12169 return 0;
12170 }
12171 else if (GET_CODE (addr) == AND
12172 && const_32bit_mask (XEXP (addr, 1), DImode))
12173 {
12174 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12175 if (addr == NULL_RTX)
12176 return 0;
12177
12178 if (CONST_INT_P (addr))
12179 return 0;
12180 }
12181 }
12182
12183 /* Allow SImode subregs of DImode addresses,
12184 they will be emitted with addr32 prefix. */
12185 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12186 {
12187 if (GET_CODE (addr) == SUBREG
12188 && GET_MODE (SUBREG_REG (addr)) == DImode)
12189 {
12190 addr = SUBREG_REG (addr);
12191 if (CONST_INT_P (addr))
12192 return 0;
12193 }
12194 }
12195
12196 if (REG_P (addr))
12197 base = addr;
12198 else if (GET_CODE (addr) == SUBREG)
12199 {
12200 if (REG_P (SUBREG_REG (addr)))
12201 base = addr;
12202 else
12203 return 0;
12204 }
12205 else if (GET_CODE (addr) == PLUS)
12206 {
12207 rtx addends[4], op;
12208 int n = 0, i;
12209
12210 op = addr;
12211 do
12212 {
12213 if (n >= 4)
12214 return 0;
12215 addends[n++] = XEXP (op, 1);
12216 op = XEXP (op, 0);
12217 }
12218 while (GET_CODE (op) == PLUS);
12219 if (n >= 4)
12220 return 0;
12221 addends[n] = op;
12222
12223 for (i = n; i >= 0; --i)
12224 {
12225 op = addends[i];
12226 switch (GET_CODE (op))
12227 {
12228 case MULT:
12229 if (index)
12230 return 0;
12231 index = XEXP (op, 0);
12232 scale_rtx = XEXP (op, 1);
12233 break;
12234
12235 case ASHIFT:
12236 if (index)
12237 return 0;
12238 index = XEXP (op, 0);
12239 tmp = XEXP (op, 1);
12240 if (!CONST_INT_P (tmp))
12241 return 0;
12242 scale = INTVAL (tmp);
12243 if ((unsigned HOST_WIDE_INT) scale > 3)
12244 return 0;
12245 scale = 1 << scale;
12246 break;
12247
12248 case ZERO_EXTEND:
12249 op = XEXP (op, 0);
12250 if (GET_CODE (op) != UNSPEC)
12251 return 0;
12252 /* FALLTHRU */
12253
12254 case UNSPEC:
12255 if (XINT (op, 1) == UNSPEC_TP
12256 && TARGET_TLS_DIRECT_SEG_REFS
12257 && seg == SEG_DEFAULT)
12258 seg = DEFAULT_TLS_SEG_REG;
12259 else
12260 return 0;
12261 break;
12262
12263 case SUBREG:
12264 if (!REG_P (SUBREG_REG (op)))
12265 return 0;
12266 /* FALLTHRU */
12267
12268 case REG:
12269 if (!base)
12270 base = op;
12271 else if (!index)
12272 index = op;
12273 else
12274 return 0;
12275 break;
12276
12277 case CONST:
12278 case CONST_INT:
12279 case SYMBOL_REF:
12280 case LABEL_REF:
12281 if (disp)
12282 return 0;
12283 disp = op;
12284 break;
12285
12286 default:
12287 return 0;
12288 }
12289 }
12290 }
12291 else if (GET_CODE (addr) == MULT)
12292 {
12293 index = XEXP (addr, 0); /* index*scale */
12294 scale_rtx = XEXP (addr, 1);
12295 }
12296 else if (GET_CODE (addr) == ASHIFT)
12297 {
12298 /* We're called for lea too, which implements ashift on occasion. */
12299 index = XEXP (addr, 0);
12300 tmp = XEXP (addr, 1);
12301 if (!CONST_INT_P (tmp))
12302 return 0;
12303 scale = INTVAL (tmp);
12304 if ((unsigned HOST_WIDE_INT) scale > 3)
12305 return 0;
12306 scale = 1 << scale;
12307 retval = -1;
12308 }
12309 else
12310 disp = addr; /* displacement */
12311
12312 if (index)
12313 {
12314 if (REG_P (index))
12315 ;
12316 else if (GET_CODE (index) == SUBREG
12317 && REG_P (SUBREG_REG (index)))
12318 ;
12319 else
12320 return 0;
12321 }
12322
12323 /* Extract the integral value of scale. */
12324 if (scale_rtx)
12325 {
12326 if (!CONST_INT_P (scale_rtx))
12327 return 0;
12328 scale = INTVAL (scale_rtx);
12329 }
12330
12331 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12332 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12333
12334 /* Avoid useless 0 displacement. */
12335 if (disp == const0_rtx && (base || index))
12336 disp = NULL_RTX;
12337
12338 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12339 if (base_reg && index_reg && scale == 1
12340 && (index_reg == arg_pointer_rtx
12341 || index_reg == frame_pointer_rtx
12342 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12343 {
12344 rtx tmp;
12345 tmp = base, base = index, index = tmp;
12346 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12347 }
12348
12349 /* Special case: %ebp cannot be encoded as a base without a displacement.
12350 Similarly %r13. */
12351 if (!disp
12352 && base_reg
12353 && (base_reg == hard_frame_pointer_rtx
12354 || base_reg == frame_pointer_rtx
12355 || base_reg == arg_pointer_rtx
12356 || (REG_P (base_reg)
12357 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12358 || REGNO (base_reg) == R13_REG))))
12359 disp = const0_rtx;
12360
12361 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12362 Avoid this by transforming to [%esi+0].
12363 Reload calls address legitimization without cfun defined, so we need
12364 to test cfun for being non-NULL. */
12365 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12366 && base_reg && !index_reg && !disp
12367 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12368 disp = const0_rtx;
12369
12370 /* Special case: encode reg+reg instead of reg*2. */
12371 if (!base && index && scale == 2)
12372 base = index, base_reg = index_reg, scale = 1;
12373
12374 /* Special case: scaling cannot be encoded without base or displacement. */
12375 if (!base && !disp && index && scale != 1)
12376 disp = const0_rtx;
12377
12378 out->base = base;
12379 out->index = index;
12380 out->disp = disp;
12381 out->scale = scale;
12382 out->seg = seg;
12383
12384 return retval;
12385 }
12386 \f
12387 /* Return cost of the memory address x.
12388 For i386, it is better to use a complex address than let gcc copy
12389 the address into a reg and make a new pseudo. But not if the address
12390 requires to two regs - that would mean more pseudos with longer
12391 lifetimes. */
12392 static int
12393 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12394 addr_space_t as ATTRIBUTE_UNUSED,
12395 bool speed ATTRIBUTE_UNUSED)
12396 {
12397 struct ix86_address parts;
12398 int cost = 1;
12399 int ok = ix86_decompose_address (x, &parts);
12400
12401 gcc_assert (ok);
12402
12403 if (parts.base && GET_CODE (parts.base) == SUBREG)
12404 parts.base = SUBREG_REG (parts.base);
12405 if (parts.index && GET_CODE (parts.index) == SUBREG)
12406 parts.index = SUBREG_REG (parts.index);
12407
12408 /* Attempt to minimize number of registers in the address. */
12409 if ((parts.base
12410 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12411 || (parts.index
12412 && (!REG_P (parts.index)
12413 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12414 cost++;
12415
12416 if (parts.base
12417 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12418 && parts.index
12419 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12420 && parts.base != parts.index)
12421 cost++;
12422
12423 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12424 since it's predecode logic can't detect the length of instructions
12425 and it degenerates to vector decoded. Increase cost of such
12426 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12427 to split such addresses or even refuse such addresses at all.
12428
12429 Following addressing modes are affected:
12430 [base+scale*index]
12431 [scale*index+disp]
12432 [base+index]
12433
12434 The first and last case may be avoidable by explicitly coding the zero in
12435 memory address, but I don't have AMD-K6 machine handy to check this
12436 theory. */
12437
12438 if (TARGET_K6
12439 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12440 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12441 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12442 cost += 10;
12443
12444 return cost;
12445 }
12446 \f
12447 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12448 this is used for to form addresses to local data when -fPIC is in
12449 use. */
12450
12451 static bool
12452 darwin_local_data_pic (rtx disp)
12453 {
12454 return (GET_CODE (disp) == UNSPEC
12455 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12456 }
12457
12458 /* Determine if a given RTX is a valid constant. We already know this
12459 satisfies CONSTANT_P. */
12460
12461 static bool
12462 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12463 {
12464 switch (GET_CODE (x))
12465 {
12466 case CONST:
12467 x = XEXP (x, 0);
12468
12469 if (GET_CODE (x) == PLUS)
12470 {
12471 if (!CONST_INT_P (XEXP (x, 1)))
12472 return false;
12473 x = XEXP (x, 0);
12474 }
12475
12476 if (TARGET_MACHO && darwin_local_data_pic (x))
12477 return true;
12478
12479 /* Only some unspecs are valid as "constants". */
12480 if (GET_CODE (x) == UNSPEC)
12481 switch (XINT (x, 1))
12482 {
12483 case UNSPEC_GOT:
12484 case UNSPEC_GOTOFF:
12485 case UNSPEC_PLTOFF:
12486 return TARGET_64BIT;
12487 case UNSPEC_TPOFF:
12488 case UNSPEC_NTPOFF:
12489 x = XVECEXP (x, 0, 0);
12490 return (GET_CODE (x) == SYMBOL_REF
12491 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12492 case UNSPEC_DTPOFF:
12493 x = XVECEXP (x, 0, 0);
12494 return (GET_CODE (x) == SYMBOL_REF
12495 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12496 default:
12497 return false;
12498 }
12499
12500 /* We must have drilled down to a symbol. */
12501 if (GET_CODE (x) == LABEL_REF)
12502 return true;
12503 if (GET_CODE (x) != SYMBOL_REF)
12504 return false;
12505 /* FALLTHRU */
12506
12507 case SYMBOL_REF:
12508 /* TLS symbols are never valid. */
12509 if (SYMBOL_REF_TLS_MODEL (x))
12510 return false;
12511
12512 /* DLLIMPORT symbols are never valid. */
12513 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12514 && SYMBOL_REF_DLLIMPORT_P (x))
12515 return false;
12516
12517 #if TARGET_MACHO
12518 /* mdynamic-no-pic */
12519 if (MACHO_DYNAMIC_NO_PIC_P)
12520 return machopic_symbol_defined_p (x);
12521 #endif
12522 break;
12523
12524 case CONST_DOUBLE:
12525 if (GET_MODE (x) == TImode
12526 && x != CONST0_RTX (TImode)
12527 && !TARGET_64BIT)
12528 return false;
12529 break;
12530
12531 case CONST_VECTOR:
12532 if (!standard_sse_constant_p (x))
12533 return false;
12534
12535 default:
12536 break;
12537 }
12538
12539 /* Otherwise we handle everything else in the move patterns. */
12540 return true;
12541 }
12542
12543 /* Determine if it's legal to put X into the constant pool. This
12544 is not possible for the address of thread-local symbols, which
12545 is checked above. */
12546
12547 static bool
12548 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12549 {
12550 /* We can always put integral constants and vectors in memory. */
12551 switch (GET_CODE (x))
12552 {
12553 case CONST_INT:
12554 case CONST_DOUBLE:
12555 case CONST_VECTOR:
12556 return false;
12557
12558 default:
12559 break;
12560 }
12561 return !ix86_legitimate_constant_p (mode, x);
12562 }
12563
12564 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12565 otherwise zero. */
12566
12567 static bool
12568 is_imported_p (rtx x)
12569 {
12570 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12571 || GET_CODE (x) != SYMBOL_REF)
12572 return false;
12573
12574 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12575 }
12576
12577
12578 /* Nonzero if the constant value X is a legitimate general operand
12579 when generating PIC code. It is given that flag_pic is on and
12580 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12581
12582 bool
12583 legitimate_pic_operand_p (rtx x)
12584 {
12585 rtx inner;
12586
12587 switch (GET_CODE (x))
12588 {
12589 case CONST:
12590 inner = XEXP (x, 0);
12591 if (GET_CODE (inner) == PLUS
12592 && CONST_INT_P (XEXP (inner, 1)))
12593 inner = XEXP (inner, 0);
12594
12595 /* Only some unspecs are valid as "constants". */
12596 if (GET_CODE (inner) == UNSPEC)
12597 switch (XINT (inner, 1))
12598 {
12599 case UNSPEC_GOT:
12600 case UNSPEC_GOTOFF:
12601 case UNSPEC_PLTOFF:
12602 return TARGET_64BIT;
12603 case UNSPEC_TPOFF:
12604 x = XVECEXP (inner, 0, 0);
12605 return (GET_CODE (x) == SYMBOL_REF
12606 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12607 case UNSPEC_MACHOPIC_OFFSET:
12608 return legitimate_pic_address_disp_p (x);
12609 default:
12610 return false;
12611 }
12612 /* FALLTHRU */
12613
12614 case SYMBOL_REF:
12615 case LABEL_REF:
12616 return legitimate_pic_address_disp_p (x);
12617
12618 default:
12619 return true;
12620 }
12621 }
12622
12623 /* Determine if a given CONST RTX is a valid memory displacement
12624 in PIC mode. */
12625
12626 bool
12627 legitimate_pic_address_disp_p (rtx disp)
12628 {
12629 bool saw_plus;
12630
12631 /* In 64bit mode we can allow direct addresses of symbols and labels
12632 when they are not dynamic symbols. */
12633 if (TARGET_64BIT)
12634 {
12635 rtx op0 = disp, op1;
12636
12637 switch (GET_CODE (disp))
12638 {
12639 case LABEL_REF:
12640 return true;
12641
12642 case CONST:
12643 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12644 break;
12645 op0 = XEXP (XEXP (disp, 0), 0);
12646 op1 = XEXP (XEXP (disp, 0), 1);
12647 if (!CONST_INT_P (op1)
12648 || INTVAL (op1) >= 16*1024*1024
12649 || INTVAL (op1) < -16*1024*1024)
12650 break;
12651 if (GET_CODE (op0) == LABEL_REF)
12652 return true;
12653 if (GET_CODE (op0) == CONST
12654 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12655 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12656 return true;
12657 if (GET_CODE (op0) == UNSPEC
12658 && XINT (op0, 1) == UNSPEC_PCREL)
12659 return true;
12660 if (GET_CODE (op0) != SYMBOL_REF)
12661 break;
12662 /* FALLTHRU */
12663
12664 case SYMBOL_REF:
12665 /* TLS references should always be enclosed in UNSPEC.
12666 The dllimported symbol needs always to be resolved. */
12667 if (SYMBOL_REF_TLS_MODEL (op0)
12668 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12669 return false;
12670
12671 if (TARGET_PECOFF)
12672 {
12673 if (is_imported_p (op0))
12674 return true;
12675
12676 if (SYMBOL_REF_FAR_ADDR_P (op0)
12677 || !SYMBOL_REF_LOCAL_P (op0))
12678 break;
12679
12680 /* Function-symbols need to be resolved only for
12681 large-model.
12682 For the small-model we don't need to resolve anything
12683 here. */
12684 if ((ix86_cmodel != CM_LARGE_PIC
12685 && SYMBOL_REF_FUNCTION_P (op0))
12686 || ix86_cmodel == CM_SMALL_PIC)
12687 return true;
12688 /* Non-external symbols don't need to be resolved for
12689 large, and medium-model. */
12690 if ((ix86_cmodel == CM_LARGE_PIC
12691 || ix86_cmodel == CM_MEDIUM_PIC)
12692 && !SYMBOL_REF_EXTERNAL_P (op0))
12693 return true;
12694 }
12695 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12696 && SYMBOL_REF_LOCAL_P (op0)
12697 && ix86_cmodel != CM_LARGE_PIC)
12698 return true;
12699 break;
12700
12701 default:
12702 break;
12703 }
12704 }
12705 if (GET_CODE (disp) != CONST)
12706 return false;
12707 disp = XEXP (disp, 0);
12708
12709 if (TARGET_64BIT)
12710 {
12711 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12712 of GOT tables. We should not need these anyway. */
12713 if (GET_CODE (disp) != UNSPEC
12714 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12715 && XINT (disp, 1) != UNSPEC_GOTOFF
12716 && XINT (disp, 1) != UNSPEC_PCREL
12717 && XINT (disp, 1) != UNSPEC_PLTOFF))
12718 return false;
12719
12720 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12721 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12722 return false;
12723 return true;
12724 }
12725
12726 saw_plus = false;
12727 if (GET_CODE (disp) == PLUS)
12728 {
12729 if (!CONST_INT_P (XEXP (disp, 1)))
12730 return false;
12731 disp = XEXP (disp, 0);
12732 saw_plus = true;
12733 }
12734
12735 if (TARGET_MACHO && darwin_local_data_pic (disp))
12736 return true;
12737
12738 if (GET_CODE (disp) != UNSPEC)
12739 return false;
12740
12741 switch (XINT (disp, 1))
12742 {
12743 case UNSPEC_GOT:
12744 if (saw_plus)
12745 return false;
12746 /* We need to check for both symbols and labels because VxWorks loads
12747 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12748 details. */
12749 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12750 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12751 case UNSPEC_GOTOFF:
12752 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12753 While ABI specify also 32bit relocation but we don't produce it in
12754 small PIC model at all. */
12755 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12756 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12757 && !TARGET_64BIT)
12758 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12759 return false;
12760 case UNSPEC_GOTTPOFF:
12761 case UNSPEC_GOTNTPOFF:
12762 case UNSPEC_INDNTPOFF:
12763 if (saw_plus)
12764 return false;
12765 disp = XVECEXP (disp, 0, 0);
12766 return (GET_CODE (disp) == SYMBOL_REF
12767 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12768 case UNSPEC_NTPOFF:
12769 disp = XVECEXP (disp, 0, 0);
12770 return (GET_CODE (disp) == SYMBOL_REF
12771 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12772 case UNSPEC_DTPOFF:
12773 disp = XVECEXP (disp, 0, 0);
12774 return (GET_CODE (disp) == SYMBOL_REF
12775 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12776 }
12777
12778 return false;
12779 }
12780
12781 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12782 replace the input X, or the original X if no replacement is called for.
12783 The output parameter *WIN is 1 if the calling macro should goto WIN,
12784 0 if it should not. */
12785
12786 bool
12787 ix86_legitimize_reload_address (rtx x,
12788 enum machine_mode mode ATTRIBUTE_UNUSED,
12789 int opnum, int type,
12790 int ind_levels ATTRIBUTE_UNUSED)
12791 {
12792 /* Reload can generate:
12793
12794 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12795 (reg:DI 97))
12796 (reg:DI 2 cx))
12797
12798 This RTX is rejected from ix86_legitimate_address_p due to
12799 non-strictness of base register 97. Following this rejection,
12800 reload pushes all three components into separate registers,
12801 creating invalid memory address RTX.
12802
12803 Following code reloads only the invalid part of the
12804 memory address RTX. */
12805
12806 if (GET_CODE (x) == PLUS
12807 && REG_P (XEXP (x, 1))
12808 && GET_CODE (XEXP (x, 0)) == PLUS
12809 && REG_P (XEXP (XEXP (x, 0), 1)))
12810 {
12811 rtx base, index;
12812 bool something_reloaded = false;
12813
12814 base = XEXP (XEXP (x, 0), 1);
12815 if (!REG_OK_FOR_BASE_STRICT_P (base))
12816 {
12817 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12818 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12819 opnum, (enum reload_type) type);
12820 something_reloaded = true;
12821 }
12822
12823 index = XEXP (x, 1);
12824 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12825 {
12826 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12827 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12828 opnum, (enum reload_type) type);
12829 something_reloaded = true;
12830 }
12831
12832 gcc_assert (something_reloaded);
12833 return true;
12834 }
12835
12836 return false;
12837 }
12838
12839 /* Determine if op is suitable RTX for an address register.
12840 Return naked register if a register or a register subreg is
12841 found, otherwise return NULL_RTX. */
12842
12843 static rtx
12844 ix86_validate_address_register (rtx op)
12845 {
12846 enum machine_mode mode = GET_MODE (op);
12847
12848 /* Only SImode or DImode registers can form the address. */
12849 if (mode != SImode && mode != DImode)
12850 return NULL_RTX;
12851
12852 if (REG_P (op))
12853 return op;
12854 else if (GET_CODE (op) == SUBREG)
12855 {
12856 rtx reg = SUBREG_REG (op);
12857
12858 if (!REG_P (reg))
12859 return NULL_RTX;
12860
12861 mode = GET_MODE (reg);
12862
12863 /* Don't allow SUBREGs that span more than a word. It can
12864 lead to spill failures when the register is one word out
12865 of a two word structure. */
12866 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12867 return NULL_RTX;
12868
12869 /* Allow only SUBREGs of non-eliminable hard registers. */
12870 if (register_no_elim_operand (reg, mode))
12871 return reg;
12872 }
12873
12874 /* Op is not a register. */
12875 return NULL_RTX;
12876 }
12877
12878 /* Recognizes RTL expressions that are valid memory addresses for an
12879 instruction. The MODE argument is the machine mode for the MEM
12880 expression that wants to use this address.
12881
12882 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12883 convert common non-canonical forms to canonical form so that they will
12884 be recognized. */
12885
12886 static bool
12887 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12888 rtx addr, bool strict)
12889 {
12890 struct ix86_address parts;
12891 rtx base, index, disp;
12892 HOST_WIDE_INT scale;
12893 enum ix86_address_seg seg;
12894
12895 if (ix86_decompose_address (addr, &parts) <= 0)
12896 /* Decomposition failed. */
12897 return false;
12898
12899 base = parts.base;
12900 index = parts.index;
12901 disp = parts.disp;
12902 scale = parts.scale;
12903 seg = parts.seg;
12904
12905 /* Validate base register. */
12906 if (base)
12907 {
12908 rtx reg = ix86_validate_address_register (base);
12909
12910 if (reg == NULL_RTX)
12911 return false;
12912
12913 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12914 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12915 /* Base is not valid. */
12916 return false;
12917 }
12918
12919 /* Validate index register. */
12920 if (index)
12921 {
12922 rtx reg = ix86_validate_address_register (index);
12923
12924 if (reg == NULL_RTX)
12925 return false;
12926
12927 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12928 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12929 /* Index is not valid. */
12930 return false;
12931 }
12932
12933 /* Index and base should have the same mode. */
12934 if (base && index
12935 && GET_MODE (base) != GET_MODE (index))
12936 return false;
12937
12938 /* Address override works only on the (%reg) part of %fs:(%reg). */
12939 if (seg != SEG_DEFAULT
12940 && ((base && GET_MODE (base) != word_mode)
12941 || (index && GET_MODE (index) != word_mode)))
12942 return false;
12943
12944 /* Validate scale factor. */
12945 if (scale != 1)
12946 {
12947 if (!index)
12948 /* Scale without index. */
12949 return false;
12950
12951 if (scale != 2 && scale != 4 && scale != 8)
12952 /* Scale is not a valid multiplier. */
12953 return false;
12954 }
12955
12956 /* Validate displacement. */
12957 if (disp)
12958 {
12959 if (GET_CODE (disp) == CONST
12960 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12961 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12962 switch (XINT (XEXP (disp, 0), 1))
12963 {
12964 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12965 used. While ABI specify also 32bit relocations, we don't produce
12966 them at all and use IP relative instead. */
12967 case UNSPEC_GOT:
12968 case UNSPEC_GOTOFF:
12969 gcc_assert (flag_pic);
12970 if (!TARGET_64BIT)
12971 goto is_legitimate_pic;
12972
12973 /* 64bit address unspec. */
12974 return false;
12975
12976 case UNSPEC_GOTPCREL:
12977 case UNSPEC_PCREL:
12978 gcc_assert (flag_pic);
12979 goto is_legitimate_pic;
12980
12981 case UNSPEC_GOTTPOFF:
12982 case UNSPEC_GOTNTPOFF:
12983 case UNSPEC_INDNTPOFF:
12984 case UNSPEC_NTPOFF:
12985 case UNSPEC_DTPOFF:
12986 break;
12987
12988 case UNSPEC_STACK_CHECK:
12989 gcc_assert (flag_split_stack);
12990 break;
12991
12992 default:
12993 /* Invalid address unspec. */
12994 return false;
12995 }
12996
12997 else if (SYMBOLIC_CONST (disp)
12998 && (flag_pic
12999 || (TARGET_MACHO
13000 #if TARGET_MACHO
13001 && MACHOPIC_INDIRECT
13002 && !machopic_operand_p (disp)
13003 #endif
13004 )))
13005 {
13006
13007 is_legitimate_pic:
13008 if (TARGET_64BIT && (index || base))
13009 {
13010 /* foo@dtpoff(%rX) is ok. */
13011 if (GET_CODE (disp) != CONST
13012 || GET_CODE (XEXP (disp, 0)) != PLUS
13013 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13014 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13015 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13016 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13017 /* Non-constant pic memory reference. */
13018 return false;
13019 }
13020 else if ((!TARGET_MACHO || flag_pic)
13021 && ! legitimate_pic_address_disp_p (disp))
13022 /* Displacement is an invalid pic construct. */
13023 return false;
13024 #if TARGET_MACHO
13025 else if (MACHO_DYNAMIC_NO_PIC_P
13026 && !ix86_legitimate_constant_p (Pmode, disp))
13027 /* displacment must be referenced via non_lazy_pointer */
13028 return false;
13029 #endif
13030
13031 /* This code used to verify that a symbolic pic displacement
13032 includes the pic_offset_table_rtx register.
13033
13034 While this is good idea, unfortunately these constructs may
13035 be created by "adds using lea" optimization for incorrect
13036 code like:
13037
13038 int a;
13039 int foo(int i)
13040 {
13041 return *(&a+i);
13042 }
13043
13044 This code is nonsensical, but results in addressing
13045 GOT table with pic_offset_table_rtx base. We can't
13046 just refuse it easily, since it gets matched by
13047 "addsi3" pattern, that later gets split to lea in the
13048 case output register differs from input. While this
13049 can be handled by separate addsi pattern for this case
13050 that never results in lea, this seems to be easier and
13051 correct fix for crash to disable this test. */
13052 }
13053 else if (GET_CODE (disp) != LABEL_REF
13054 && !CONST_INT_P (disp)
13055 && (GET_CODE (disp) != CONST
13056 || !ix86_legitimate_constant_p (Pmode, disp))
13057 && (GET_CODE (disp) != SYMBOL_REF
13058 || !ix86_legitimate_constant_p (Pmode, disp)))
13059 /* Displacement is not constant. */
13060 return false;
13061 else if (TARGET_64BIT
13062 && !x86_64_immediate_operand (disp, VOIDmode))
13063 /* Displacement is out of range. */
13064 return false;
13065 /* In x32 mode, constant addresses are sign extended to 64bit, so
13066 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13067 else if (TARGET_X32 && !(index || base)
13068 && CONST_INT_P (disp)
13069 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13070 return false;
13071 }
13072
13073 /* Everything looks valid. */
13074 return true;
13075 }
13076
13077 /* Determine if a given RTX is a valid constant address. */
13078
13079 bool
13080 constant_address_p (rtx x)
13081 {
13082 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13083 }
13084 \f
13085 /* Return a unique alias set for the GOT. */
13086
13087 static alias_set_type
13088 ix86_GOT_alias_set (void)
13089 {
13090 static alias_set_type set = -1;
13091 if (set == -1)
13092 set = new_alias_set ();
13093 return set;
13094 }
13095
13096 /* Return a legitimate reference for ORIG (an address) using the
13097 register REG. If REG is 0, a new pseudo is generated.
13098
13099 There are two types of references that must be handled:
13100
13101 1. Global data references must load the address from the GOT, via
13102 the PIC reg. An insn is emitted to do this load, and the reg is
13103 returned.
13104
13105 2. Static data references, constant pool addresses, and code labels
13106 compute the address as an offset from the GOT, whose base is in
13107 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13108 differentiate them from global data objects. The returned
13109 address is the PIC reg + an unspec constant.
13110
13111 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13112 reg also appears in the address. */
13113
13114 static rtx
13115 legitimize_pic_address (rtx orig, rtx reg)
13116 {
13117 rtx addr = orig;
13118 rtx new_rtx = orig;
13119
13120 #if TARGET_MACHO
13121 if (TARGET_MACHO && !TARGET_64BIT)
13122 {
13123 if (reg == 0)
13124 reg = gen_reg_rtx (Pmode);
13125 /* Use the generic Mach-O PIC machinery. */
13126 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13127 }
13128 #endif
13129
13130 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13131 {
13132 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13133 if (tmp)
13134 return tmp;
13135 }
13136
13137 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13138 new_rtx = addr;
13139 else if (TARGET_64BIT && !TARGET_PECOFF
13140 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13141 {
13142 rtx tmpreg;
13143 /* This symbol may be referenced via a displacement from the PIC
13144 base address (@GOTOFF). */
13145
13146 if (reload_in_progress)
13147 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13148 if (GET_CODE (addr) == CONST)
13149 addr = XEXP (addr, 0);
13150 if (GET_CODE (addr) == PLUS)
13151 {
13152 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13153 UNSPEC_GOTOFF);
13154 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13155 }
13156 else
13157 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13158 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13159 if (!reg)
13160 tmpreg = gen_reg_rtx (Pmode);
13161 else
13162 tmpreg = reg;
13163 emit_move_insn (tmpreg, new_rtx);
13164
13165 if (reg != 0)
13166 {
13167 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13168 tmpreg, 1, OPTAB_DIRECT);
13169 new_rtx = reg;
13170 }
13171 else
13172 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13173 }
13174 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13175 {
13176 /* This symbol may be referenced via a displacement from the PIC
13177 base address (@GOTOFF). */
13178
13179 if (reload_in_progress)
13180 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13181 if (GET_CODE (addr) == CONST)
13182 addr = XEXP (addr, 0);
13183 if (GET_CODE (addr) == PLUS)
13184 {
13185 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13186 UNSPEC_GOTOFF);
13187 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13188 }
13189 else
13190 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13191 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13192 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13193
13194 if (reg != 0)
13195 {
13196 emit_move_insn (reg, new_rtx);
13197 new_rtx = reg;
13198 }
13199 }
13200 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13201 /* We can't use @GOTOFF for text labels on VxWorks;
13202 see gotoff_operand. */
13203 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13204 {
13205 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13206 if (tmp)
13207 return tmp;
13208
13209 /* For x64 PE-COFF there is no GOT table. So we use address
13210 directly. */
13211 if (TARGET_64BIT && TARGET_PECOFF)
13212 {
13213 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13214 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13215
13216 if (reg == 0)
13217 reg = gen_reg_rtx (Pmode);
13218 emit_move_insn (reg, new_rtx);
13219 new_rtx = reg;
13220 }
13221 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13222 {
13223 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13224 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13225 new_rtx = gen_const_mem (Pmode, new_rtx);
13226 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13227
13228 if (reg == 0)
13229 reg = gen_reg_rtx (Pmode);
13230 /* Use directly gen_movsi, otherwise the address is loaded
13231 into register for CSE. We don't want to CSE this addresses,
13232 instead we CSE addresses from the GOT table, so skip this. */
13233 emit_insn (gen_movsi (reg, new_rtx));
13234 new_rtx = reg;
13235 }
13236 else
13237 {
13238 /* This symbol must be referenced via a load from the
13239 Global Offset Table (@GOT). */
13240
13241 if (reload_in_progress)
13242 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13243 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13244 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13245 if (TARGET_64BIT)
13246 new_rtx = force_reg (Pmode, new_rtx);
13247 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13248 new_rtx = gen_const_mem (Pmode, new_rtx);
13249 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13250
13251 if (reg == 0)
13252 reg = gen_reg_rtx (Pmode);
13253 emit_move_insn (reg, new_rtx);
13254 new_rtx = reg;
13255 }
13256 }
13257 else
13258 {
13259 if (CONST_INT_P (addr)
13260 && !x86_64_immediate_operand (addr, VOIDmode))
13261 {
13262 if (reg)
13263 {
13264 emit_move_insn (reg, addr);
13265 new_rtx = reg;
13266 }
13267 else
13268 new_rtx = force_reg (Pmode, addr);
13269 }
13270 else if (GET_CODE (addr) == CONST)
13271 {
13272 addr = XEXP (addr, 0);
13273
13274 /* We must match stuff we generate before. Assume the only
13275 unspecs that can get here are ours. Not that we could do
13276 anything with them anyway.... */
13277 if (GET_CODE (addr) == UNSPEC
13278 || (GET_CODE (addr) == PLUS
13279 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13280 return orig;
13281 gcc_assert (GET_CODE (addr) == PLUS);
13282 }
13283 if (GET_CODE (addr) == PLUS)
13284 {
13285 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13286
13287 /* Check first to see if this is a constant offset from a @GOTOFF
13288 symbol reference. */
13289 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13290 && CONST_INT_P (op1))
13291 {
13292 if (!TARGET_64BIT)
13293 {
13294 if (reload_in_progress)
13295 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13296 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13297 UNSPEC_GOTOFF);
13298 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13299 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13300 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13301
13302 if (reg != 0)
13303 {
13304 emit_move_insn (reg, new_rtx);
13305 new_rtx = reg;
13306 }
13307 }
13308 else
13309 {
13310 if (INTVAL (op1) < -16*1024*1024
13311 || INTVAL (op1) >= 16*1024*1024)
13312 {
13313 if (!x86_64_immediate_operand (op1, Pmode))
13314 op1 = force_reg (Pmode, op1);
13315 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13316 }
13317 }
13318 }
13319 else
13320 {
13321 rtx base = legitimize_pic_address (op0, reg);
13322 enum machine_mode mode = GET_MODE (base);
13323 new_rtx
13324 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13325
13326 if (CONST_INT_P (new_rtx))
13327 {
13328 if (INTVAL (new_rtx) < -16*1024*1024
13329 || INTVAL (new_rtx) >= 16*1024*1024)
13330 {
13331 if (!x86_64_immediate_operand (new_rtx, mode))
13332 new_rtx = force_reg (mode, new_rtx);
13333 new_rtx
13334 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13335 }
13336 else
13337 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13338 }
13339 else
13340 {
13341 if (GET_CODE (new_rtx) == PLUS
13342 && CONSTANT_P (XEXP (new_rtx, 1)))
13343 {
13344 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13345 new_rtx = XEXP (new_rtx, 1);
13346 }
13347 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13348 }
13349 }
13350 }
13351 }
13352 return new_rtx;
13353 }
13354 \f
13355 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13356
13357 static rtx
13358 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13359 {
13360 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13361
13362 if (GET_MODE (tp) != tp_mode)
13363 {
13364 gcc_assert (GET_MODE (tp) == SImode);
13365 gcc_assert (tp_mode == DImode);
13366
13367 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13368 }
13369
13370 if (to_reg)
13371 tp = copy_to_mode_reg (tp_mode, tp);
13372
13373 return tp;
13374 }
13375
13376 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13377
13378 static GTY(()) rtx ix86_tls_symbol;
13379
13380 static rtx
13381 ix86_tls_get_addr (void)
13382 {
13383 if (!ix86_tls_symbol)
13384 {
13385 const char *sym
13386 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13387 ? "___tls_get_addr" : "__tls_get_addr");
13388
13389 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13390 }
13391
13392 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13393 {
13394 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13395 UNSPEC_PLTOFF);
13396 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13397 gen_rtx_CONST (Pmode, unspec));
13398 }
13399
13400 return ix86_tls_symbol;
13401 }
13402
13403 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13404
13405 static GTY(()) rtx ix86_tls_module_base_symbol;
13406
13407 rtx
13408 ix86_tls_module_base (void)
13409 {
13410 if (!ix86_tls_module_base_symbol)
13411 {
13412 ix86_tls_module_base_symbol
13413 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13414
13415 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13416 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13417 }
13418
13419 return ix86_tls_module_base_symbol;
13420 }
13421
13422 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13423 false if we expect this to be used for a memory address and true if
13424 we expect to load the address into a register. */
13425
13426 static rtx
13427 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13428 {
13429 rtx dest, base, off;
13430 rtx pic = NULL_RTX, tp = NULL_RTX;
13431 enum machine_mode tp_mode = Pmode;
13432 int type;
13433
13434 /* Fall back to global dynamic model if tool chain cannot support local
13435 dynamic. */
13436 if (TARGET_SUN_TLS && !TARGET_64BIT
13437 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13438 && model == TLS_MODEL_LOCAL_DYNAMIC)
13439 model = TLS_MODEL_GLOBAL_DYNAMIC;
13440
13441 switch (model)
13442 {
13443 case TLS_MODEL_GLOBAL_DYNAMIC:
13444 dest = gen_reg_rtx (Pmode);
13445
13446 if (!TARGET_64BIT)
13447 {
13448 if (flag_pic && !TARGET_PECOFF)
13449 pic = pic_offset_table_rtx;
13450 else
13451 {
13452 pic = gen_reg_rtx (Pmode);
13453 emit_insn (gen_set_got (pic));
13454 }
13455 }
13456
13457 if (TARGET_GNU2_TLS)
13458 {
13459 if (TARGET_64BIT)
13460 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13461 else
13462 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13463
13464 tp = get_thread_pointer (Pmode, true);
13465 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13466
13467 if (GET_MODE (x) != Pmode)
13468 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13469
13470 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13471 }
13472 else
13473 {
13474 rtx caddr = ix86_tls_get_addr ();
13475
13476 if (TARGET_64BIT)
13477 {
13478 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13479 rtx insns;
13480
13481 start_sequence ();
13482 emit_call_insn
13483 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13484 insns = get_insns ();
13485 end_sequence ();
13486
13487 if (GET_MODE (x) != Pmode)
13488 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13489
13490 RTL_CONST_CALL_P (insns) = 1;
13491 emit_libcall_block (insns, dest, rax, x);
13492 }
13493 else
13494 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13495 }
13496 break;
13497
13498 case TLS_MODEL_LOCAL_DYNAMIC:
13499 base = gen_reg_rtx (Pmode);
13500
13501 if (!TARGET_64BIT)
13502 {
13503 if (flag_pic)
13504 pic = pic_offset_table_rtx;
13505 else
13506 {
13507 pic = gen_reg_rtx (Pmode);
13508 emit_insn (gen_set_got (pic));
13509 }
13510 }
13511
13512 if (TARGET_GNU2_TLS)
13513 {
13514 rtx tmp = ix86_tls_module_base ();
13515
13516 if (TARGET_64BIT)
13517 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13518 else
13519 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13520
13521 tp = get_thread_pointer (Pmode, true);
13522 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13523 gen_rtx_MINUS (Pmode, tmp, tp));
13524 }
13525 else
13526 {
13527 rtx caddr = ix86_tls_get_addr ();
13528
13529 if (TARGET_64BIT)
13530 {
13531 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13532 rtx insns, eqv;
13533
13534 start_sequence ();
13535 emit_call_insn
13536 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13537 insns = get_insns ();
13538 end_sequence ();
13539
13540 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13541 share the LD_BASE result with other LD model accesses. */
13542 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13543 UNSPEC_TLS_LD_BASE);
13544
13545 RTL_CONST_CALL_P (insns) = 1;
13546 emit_libcall_block (insns, base, rax, eqv);
13547 }
13548 else
13549 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13550 }
13551
13552 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13553 off = gen_rtx_CONST (Pmode, off);
13554
13555 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13556
13557 if (TARGET_GNU2_TLS)
13558 {
13559 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13560
13561 if (GET_MODE (x) != Pmode)
13562 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13563
13564 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13565 }
13566 break;
13567
13568 case TLS_MODEL_INITIAL_EXEC:
13569 if (TARGET_64BIT)
13570 {
13571 if (TARGET_SUN_TLS && !TARGET_X32)
13572 {
13573 /* The Sun linker took the AMD64 TLS spec literally
13574 and can only handle %rax as destination of the
13575 initial executable code sequence. */
13576
13577 dest = gen_reg_rtx (DImode);
13578 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13579 return dest;
13580 }
13581
13582 /* Generate DImode references to avoid %fs:(%reg32)
13583 problems and linker IE->LE relaxation bug. */
13584 tp_mode = DImode;
13585 pic = NULL;
13586 type = UNSPEC_GOTNTPOFF;
13587 }
13588 else if (flag_pic)
13589 {
13590 if (reload_in_progress)
13591 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13592 pic = pic_offset_table_rtx;
13593 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13594 }
13595 else if (!TARGET_ANY_GNU_TLS)
13596 {
13597 pic = gen_reg_rtx (Pmode);
13598 emit_insn (gen_set_got (pic));
13599 type = UNSPEC_GOTTPOFF;
13600 }
13601 else
13602 {
13603 pic = NULL;
13604 type = UNSPEC_INDNTPOFF;
13605 }
13606
13607 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13608 off = gen_rtx_CONST (tp_mode, off);
13609 if (pic)
13610 off = gen_rtx_PLUS (tp_mode, pic, off);
13611 off = gen_const_mem (tp_mode, off);
13612 set_mem_alias_set (off, ix86_GOT_alias_set ());
13613
13614 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13615 {
13616 base = get_thread_pointer (tp_mode,
13617 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13618 off = force_reg (tp_mode, off);
13619 return gen_rtx_PLUS (tp_mode, base, off);
13620 }
13621 else
13622 {
13623 base = get_thread_pointer (Pmode, true);
13624 dest = gen_reg_rtx (Pmode);
13625 emit_insn (ix86_gen_sub3 (dest, base, off));
13626 }
13627 break;
13628
13629 case TLS_MODEL_LOCAL_EXEC:
13630 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13631 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13632 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13633 off = gen_rtx_CONST (Pmode, off);
13634
13635 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13636 {
13637 base = get_thread_pointer (Pmode,
13638 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13639 return gen_rtx_PLUS (Pmode, base, off);
13640 }
13641 else
13642 {
13643 base = get_thread_pointer (Pmode, true);
13644 dest = gen_reg_rtx (Pmode);
13645 emit_insn (ix86_gen_sub3 (dest, base, off));
13646 }
13647 break;
13648
13649 default:
13650 gcc_unreachable ();
13651 }
13652
13653 return dest;
13654 }
13655
13656 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13657 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13658 unique refptr-DECL symbol corresponding to symbol DECL. */
13659
13660 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13661 htab_t dllimport_map;
13662
13663 static tree
13664 get_dllimport_decl (tree decl, bool beimport)
13665 {
13666 struct tree_map *h, in;
13667 void **loc;
13668 const char *name;
13669 const char *prefix;
13670 size_t namelen, prefixlen;
13671 char *imp_name;
13672 tree to;
13673 rtx rtl;
13674
13675 if (!dllimport_map)
13676 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13677
13678 in.hash = htab_hash_pointer (decl);
13679 in.base.from = decl;
13680 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13681 h = (struct tree_map *) *loc;
13682 if (h)
13683 return h->to;
13684
13685 *loc = h = ggc_alloc<tree_map> ();
13686 h->hash = in.hash;
13687 h->base.from = decl;
13688 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13689 VAR_DECL, NULL, ptr_type_node);
13690 DECL_ARTIFICIAL (to) = 1;
13691 DECL_IGNORED_P (to) = 1;
13692 DECL_EXTERNAL (to) = 1;
13693 TREE_READONLY (to) = 1;
13694
13695 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13696 name = targetm.strip_name_encoding (name);
13697 if (beimport)
13698 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13699 ? "*__imp_" : "*__imp__";
13700 else
13701 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13702 namelen = strlen (name);
13703 prefixlen = strlen (prefix);
13704 imp_name = (char *) alloca (namelen + prefixlen + 1);
13705 memcpy (imp_name, prefix, prefixlen);
13706 memcpy (imp_name + prefixlen, name, namelen + 1);
13707
13708 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13709 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13710 SET_SYMBOL_REF_DECL (rtl, to);
13711 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13712 if (!beimport)
13713 {
13714 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13715 #ifdef SUB_TARGET_RECORD_STUB
13716 SUB_TARGET_RECORD_STUB (name);
13717 #endif
13718 }
13719
13720 rtl = gen_const_mem (Pmode, rtl);
13721 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13722
13723 SET_DECL_RTL (to, rtl);
13724 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13725
13726 return to;
13727 }
13728
13729 /* Expand SYMBOL into its corresponding far-addresse symbol.
13730 WANT_REG is true if we require the result be a register. */
13731
13732 static rtx
13733 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13734 {
13735 tree imp_decl;
13736 rtx x;
13737
13738 gcc_assert (SYMBOL_REF_DECL (symbol));
13739 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13740
13741 x = DECL_RTL (imp_decl);
13742 if (want_reg)
13743 x = force_reg (Pmode, x);
13744 return x;
13745 }
13746
13747 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13748 true if we require the result be a register. */
13749
13750 static rtx
13751 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13752 {
13753 tree imp_decl;
13754 rtx x;
13755
13756 gcc_assert (SYMBOL_REF_DECL (symbol));
13757 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13758
13759 x = DECL_RTL (imp_decl);
13760 if (want_reg)
13761 x = force_reg (Pmode, x);
13762 return x;
13763 }
13764
13765 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13766 is true if we require the result be a register. */
13767
13768 static rtx
13769 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13770 {
13771 if (!TARGET_PECOFF)
13772 return NULL_RTX;
13773
13774 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13775 {
13776 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13777 return legitimize_dllimport_symbol (addr, inreg);
13778 if (GET_CODE (addr) == CONST
13779 && GET_CODE (XEXP (addr, 0)) == PLUS
13780 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13781 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13782 {
13783 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13784 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13785 }
13786 }
13787
13788 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13789 return NULL_RTX;
13790 if (GET_CODE (addr) == SYMBOL_REF
13791 && !is_imported_p (addr)
13792 && SYMBOL_REF_EXTERNAL_P (addr)
13793 && SYMBOL_REF_DECL (addr))
13794 return legitimize_pe_coff_extern_decl (addr, inreg);
13795
13796 if (GET_CODE (addr) == CONST
13797 && GET_CODE (XEXP (addr, 0)) == PLUS
13798 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13799 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13800 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13801 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13802 {
13803 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13804 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13805 }
13806 return NULL_RTX;
13807 }
13808
13809 /* Try machine-dependent ways of modifying an illegitimate address
13810 to be legitimate. If we find one, return the new, valid address.
13811 This macro is used in only one place: `memory_address' in explow.c.
13812
13813 OLDX is the address as it was before break_out_memory_refs was called.
13814 In some cases it is useful to look at this to decide what needs to be done.
13815
13816 It is always safe for this macro to do nothing. It exists to recognize
13817 opportunities to optimize the output.
13818
13819 For the 80386, we handle X+REG by loading X into a register R and
13820 using R+REG. R will go in a general reg and indexing will be used.
13821 However, if REG is a broken-out memory address or multiplication,
13822 nothing needs to be done because REG can certainly go in a general reg.
13823
13824 When -fpic is used, special handling is needed for symbolic references.
13825 See comments by legitimize_pic_address in i386.c for details. */
13826
13827 static rtx
13828 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13829 enum machine_mode mode)
13830 {
13831 int changed = 0;
13832 unsigned log;
13833
13834 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13835 if (log)
13836 return legitimize_tls_address (x, (enum tls_model) log, false);
13837 if (GET_CODE (x) == CONST
13838 && GET_CODE (XEXP (x, 0)) == PLUS
13839 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13840 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13841 {
13842 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13843 (enum tls_model) log, false);
13844 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13845 }
13846
13847 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13848 {
13849 rtx tmp = legitimize_pe_coff_symbol (x, true);
13850 if (tmp)
13851 return tmp;
13852 }
13853
13854 if (flag_pic && SYMBOLIC_CONST (x))
13855 return legitimize_pic_address (x, 0);
13856
13857 #if TARGET_MACHO
13858 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13859 return machopic_indirect_data_reference (x, 0);
13860 #endif
13861
13862 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13863 if (GET_CODE (x) == ASHIFT
13864 && CONST_INT_P (XEXP (x, 1))
13865 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13866 {
13867 changed = 1;
13868 log = INTVAL (XEXP (x, 1));
13869 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13870 GEN_INT (1 << log));
13871 }
13872
13873 if (GET_CODE (x) == PLUS)
13874 {
13875 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13876
13877 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13878 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13879 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13880 {
13881 changed = 1;
13882 log = INTVAL (XEXP (XEXP (x, 0), 1));
13883 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13884 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13885 GEN_INT (1 << log));
13886 }
13887
13888 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13889 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13890 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13891 {
13892 changed = 1;
13893 log = INTVAL (XEXP (XEXP (x, 1), 1));
13894 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13895 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13896 GEN_INT (1 << log));
13897 }
13898
13899 /* Put multiply first if it isn't already. */
13900 if (GET_CODE (XEXP (x, 1)) == MULT)
13901 {
13902 rtx tmp = XEXP (x, 0);
13903 XEXP (x, 0) = XEXP (x, 1);
13904 XEXP (x, 1) = tmp;
13905 changed = 1;
13906 }
13907
13908 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13909 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13910 created by virtual register instantiation, register elimination, and
13911 similar optimizations. */
13912 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13913 {
13914 changed = 1;
13915 x = gen_rtx_PLUS (Pmode,
13916 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13917 XEXP (XEXP (x, 1), 0)),
13918 XEXP (XEXP (x, 1), 1));
13919 }
13920
13921 /* Canonicalize
13922 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13923 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13924 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13925 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13926 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13927 && CONSTANT_P (XEXP (x, 1)))
13928 {
13929 rtx constant;
13930 rtx other = NULL_RTX;
13931
13932 if (CONST_INT_P (XEXP (x, 1)))
13933 {
13934 constant = XEXP (x, 1);
13935 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13936 }
13937 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13938 {
13939 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13940 other = XEXP (x, 1);
13941 }
13942 else
13943 constant = 0;
13944
13945 if (constant)
13946 {
13947 changed = 1;
13948 x = gen_rtx_PLUS (Pmode,
13949 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13950 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13951 plus_constant (Pmode, other,
13952 INTVAL (constant)));
13953 }
13954 }
13955
13956 if (changed && ix86_legitimate_address_p (mode, x, false))
13957 return x;
13958
13959 if (GET_CODE (XEXP (x, 0)) == MULT)
13960 {
13961 changed = 1;
13962 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13963 }
13964
13965 if (GET_CODE (XEXP (x, 1)) == MULT)
13966 {
13967 changed = 1;
13968 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13969 }
13970
13971 if (changed
13972 && REG_P (XEXP (x, 1))
13973 && REG_P (XEXP (x, 0)))
13974 return x;
13975
13976 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13977 {
13978 changed = 1;
13979 x = legitimize_pic_address (x, 0);
13980 }
13981
13982 if (changed && ix86_legitimate_address_p (mode, x, false))
13983 return x;
13984
13985 if (REG_P (XEXP (x, 0)))
13986 {
13987 rtx temp = gen_reg_rtx (Pmode);
13988 rtx val = force_operand (XEXP (x, 1), temp);
13989 if (val != temp)
13990 {
13991 val = convert_to_mode (Pmode, val, 1);
13992 emit_move_insn (temp, val);
13993 }
13994
13995 XEXP (x, 1) = temp;
13996 return x;
13997 }
13998
13999 else if (REG_P (XEXP (x, 1)))
14000 {
14001 rtx temp = gen_reg_rtx (Pmode);
14002 rtx val = force_operand (XEXP (x, 0), temp);
14003 if (val != temp)
14004 {
14005 val = convert_to_mode (Pmode, val, 1);
14006 emit_move_insn (temp, val);
14007 }
14008
14009 XEXP (x, 0) = temp;
14010 return x;
14011 }
14012 }
14013
14014 return x;
14015 }
14016 \f
14017 /* Print an integer constant expression in assembler syntax. Addition
14018 and subtraction are the only arithmetic that may appear in these
14019 expressions. FILE is the stdio stream to write to, X is the rtx, and
14020 CODE is the operand print code from the output string. */
14021
14022 static void
14023 output_pic_addr_const (FILE *file, rtx x, int code)
14024 {
14025 char buf[256];
14026
14027 switch (GET_CODE (x))
14028 {
14029 case PC:
14030 gcc_assert (flag_pic);
14031 putc ('.', file);
14032 break;
14033
14034 case SYMBOL_REF:
14035 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14036 output_addr_const (file, x);
14037 else
14038 {
14039 const char *name = XSTR (x, 0);
14040
14041 /* Mark the decl as referenced so that cgraph will
14042 output the function. */
14043 if (SYMBOL_REF_DECL (x))
14044 mark_decl_referenced (SYMBOL_REF_DECL (x));
14045
14046 #if TARGET_MACHO
14047 if (MACHOPIC_INDIRECT
14048 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14049 name = machopic_indirection_name (x, /*stub_p=*/true);
14050 #endif
14051 assemble_name (file, name);
14052 }
14053 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14054 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14055 fputs ("@PLT", file);
14056 break;
14057
14058 case LABEL_REF:
14059 x = XEXP (x, 0);
14060 /* FALLTHRU */
14061 case CODE_LABEL:
14062 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14063 assemble_name (asm_out_file, buf);
14064 break;
14065
14066 case CONST_INT:
14067 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14068 break;
14069
14070 case CONST:
14071 /* This used to output parentheses around the expression,
14072 but that does not work on the 386 (either ATT or BSD assembler). */
14073 output_pic_addr_const (file, XEXP (x, 0), code);
14074 break;
14075
14076 case CONST_DOUBLE:
14077 if (GET_MODE (x) == VOIDmode)
14078 {
14079 /* We can use %d if the number is <32 bits and positive. */
14080 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14081 fprintf (file, "0x%lx%08lx",
14082 (unsigned long) CONST_DOUBLE_HIGH (x),
14083 (unsigned long) CONST_DOUBLE_LOW (x));
14084 else
14085 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14086 }
14087 else
14088 /* We can't handle floating point constants;
14089 TARGET_PRINT_OPERAND must handle them. */
14090 output_operand_lossage ("floating constant misused");
14091 break;
14092
14093 case PLUS:
14094 /* Some assemblers need integer constants to appear first. */
14095 if (CONST_INT_P (XEXP (x, 0)))
14096 {
14097 output_pic_addr_const (file, XEXP (x, 0), code);
14098 putc ('+', file);
14099 output_pic_addr_const (file, XEXP (x, 1), code);
14100 }
14101 else
14102 {
14103 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14104 output_pic_addr_const (file, XEXP (x, 1), code);
14105 putc ('+', file);
14106 output_pic_addr_const (file, XEXP (x, 0), code);
14107 }
14108 break;
14109
14110 case MINUS:
14111 if (!TARGET_MACHO)
14112 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14113 output_pic_addr_const (file, XEXP (x, 0), code);
14114 putc ('-', file);
14115 output_pic_addr_const (file, XEXP (x, 1), code);
14116 if (!TARGET_MACHO)
14117 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14118 break;
14119
14120 case UNSPEC:
14121 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14122 {
14123 bool f = i386_asm_output_addr_const_extra (file, x);
14124 gcc_assert (f);
14125 break;
14126 }
14127
14128 gcc_assert (XVECLEN (x, 0) == 1);
14129 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14130 switch (XINT (x, 1))
14131 {
14132 case UNSPEC_GOT:
14133 fputs ("@GOT", file);
14134 break;
14135 case UNSPEC_GOTOFF:
14136 fputs ("@GOTOFF", file);
14137 break;
14138 case UNSPEC_PLTOFF:
14139 fputs ("@PLTOFF", file);
14140 break;
14141 case UNSPEC_PCREL:
14142 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14143 "(%rip)" : "[rip]", file);
14144 break;
14145 case UNSPEC_GOTPCREL:
14146 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14147 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14148 break;
14149 case UNSPEC_GOTTPOFF:
14150 /* FIXME: This might be @TPOFF in Sun ld too. */
14151 fputs ("@gottpoff", file);
14152 break;
14153 case UNSPEC_TPOFF:
14154 fputs ("@tpoff", file);
14155 break;
14156 case UNSPEC_NTPOFF:
14157 if (TARGET_64BIT)
14158 fputs ("@tpoff", file);
14159 else
14160 fputs ("@ntpoff", file);
14161 break;
14162 case UNSPEC_DTPOFF:
14163 fputs ("@dtpoff", file);
14164 break;
14165 case UNSPEC_GOTNTPOFF:
14166 if (TARGET_64BIT)
14167 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14168 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14169 else
14170 fputs ("@gotntpoff", file);
14171 break;
14172 case UNSPEC_INDNTPOFF:
14173 fputs ("@indntpoff", file);
14174 break;
14175 #if TARGET_MACHO
14176 case UNSPEC_MACHOPIC_OFFSET:
14177 putc ('-', file);
14178 machopic_output_function_base_name (file);
14179 break;
14180 #endif
14181 default:
14182 output_operand_lossage ("invalid UNSPEC as operand");
14183 break;
14184 }
14185 break;
14186
14187 default:
14188 output_operand_lossage ("invalid expression as operand");
14189 }
14190 }
14191
14192 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14193 We need to emit DTP-relative relocations. */
14194
14195 static void ATTRIBUTE_UNUSED
14196 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14197 {
14198 fputs (ASM_LONG, file);
14199 output_addr_const (file, x);
14200 fputs ("@dtpoff", file);
14201 switch (size)
14202 {
14203 case 4:
14204 break;
14205 case 8:
14206 fputs (", 0", file);
14207 break;
14208 default:
14209 gcc_unreachable ();
14210 }
14211 }
14212
14213 /* Return true if X is a representation of the PIC register. This copes
14214 with calls from ix86_find_base_term, where the register might have
14215 been replaced by a cselib value. */
14216
14217 static bool
14218 ix86_pic_register_p (rtx x)
14219 {
14220 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14221 return (pic_offset_table_rtx
14222 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14223 else
14224 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14225 }
14226
14227 /* Helper function for ix86_delegitimize_address.
14228 Attempt to delegitimize TLS local-exec accesses. */
14229
14230 static rtx
14231 ix86_delegitimize_tls_address (rtx orig_x)
14232 {
14233 rtx x = orig_x, unspec;
14234 struct ix86_address addr;
14235
14236 if (!TARGET_TLS_DIRECT_SEG_REFS)
14237 return orig_x;
14238 if (MEM_P (x))
14239 x = XEXP (x, 0);
14240 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14241 return orig_x;
14242 if (ix86_decompose_address (x, &addr) == 0
14243 || addr.seg != DEFAULT_TLS_SEG_REG
14244 || addr.disp == NULL_RTX
14245 || GET_CODE (addr.disp) != CONST)
14246 return orig_x;
14247 unspec = XEXP (addr.disp, 0);
14248 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14249 unspec = XEXP (unspec, 0);
14250 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14251 return orig_x;
14252 x = XVECEXP (unspec, 0, 0);
14253 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14254 if (unspec != XEXP (addr.disp, 0))
14255 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14256 if (addr.index)
14257 {
14258 rtx idx = addr.index;
14259 if (addr.scale != 1)
14260 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14261 x = gen_rtx_PLUS (Pmode, idx, x);
14262 }
14263 if (addr.base)
14264 x = gen_rtx_PLUS (Pmode, addr.base, x);
14265 if (MEM_P (orig_x))
14266 x = replace_equiv_address_nv (orig_x, x);
14267 return x;
14268 }
14269
14270 /* In the name of slightly smaller debug output, and to cater to
14271 general assembler lossage, recognize PIC+GOTOFF and turn it back
14272 into a direct symbol reference.
14273
14274 On Darwin, this is necessary to avoid a crash, because Darwin
14275 has a different PIC label for each routine but the DWARF debugging
14276 information is not associated with any particular routine, so it's
14277 necessary to remove references to the PIC label from RTL stored by
14278 the DWARF output code. */
14279
14280 static rtx
14281 ix86_delegitimize_address (rtx x)
14282 {
14283 rtx orig_x = delegitimize_mem_from_attrs (x);
14284 /* addend is NULL or some rtx if x is something+GOTOFF where
14285 something doesn't include the PIC register. */
14286 rtx addend = NULL_RTX;
14287 /* reg_addend is NULL or a multiple of some register. */
14288 rtx reg_addend = NULL_RTX;
14289 /* const_addend is NULL or a const_int. */
14290 rtx const_addend = NULL_RTX;
14291 /* This is the result, or NULL. */
14292 rtx result = NULL_RTX;
14293
14294 x = orig_x;
14295
14296 if (MEM_P (x))
14297 x = XEXP (x, 0);
14298
14299 if (TARGET_64BIT)
14300 {
14301 if (GET_CODE (x) == CONST
14302 && GET_CODE (XEXP (x, 0)) == PLUS
14303 && GET_MODE (XEXP (x, 0)) == Pmode
14304 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14305 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14306 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14307 {
14308 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14309 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14310 if (MEM_P (orig_x))
14311 x = replace_equiv_address_nv (orig_x, x);
14312 return x;
14313 }
14314
14315 if (GET_CODE (x) == CONST
14316 && GET_CODE (XEXP (x, 0)) == UNSPEC
14317 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14318 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14319 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14320 {
14321 x = XVECEXP (XEXP (x, 0), 0, 0);
14322 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14323 {
14324 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14325 GET_MODE (x), 0);
14326 if (x == NULL_RTX)
14327 return orig_x;
14328 }
14329 return x;
14330 }
14331
14332 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14333 return ix86_delegitimize_tls_address (orig_x);
14334
14335 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14336 and -mcmodel=medium -fpic. */
14337 }
14338
14339 if (GET_CODE (x) != PLUS
14340 || GET_CODE (XEXP (x, 1)) != CONST)
14341 return ix86_delegitimize_tls_address (orig_x);
14342
14343 if (ix86_pic_register_p (XEXP (x, 0)))
14344 /* %ebx + GOT/GOTOFF */
14345 ;
14346 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14347 {
14348 /* %ebx + %reg * scale + GOT/GOTOFF */
14349 reg_addend = XEXP (x, 0);
14350 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14351 reg_addend = XEXP (reg_addend, 1);
14352 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14353 reg_addend = XEXP (reg_addend, 0);
14354 else
14355 {
14356 reg_addend = NULL_RTX;
14357 addend = XEXP (x, 0);
14358 }
14359 }
14360 else
14361 addend = XEXP (x, 0);
14362
14363 x = XEXP (XEXP (x, 1), 0);
14364 if (GET_CODE (x) == PLUS
14365 && CONST_INT_P (XEXP (x, 1)))
14366 {
14367 const_addend = XEXP (x, 1);
14368 x = XEXP (x, 0);
14369 }
14370
14371 if (GET_CODE (x) == UNSPEC
14372 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14373 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14374 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14375 && !MEM_P (orig_x) && !addend)))
14376 result = XVECEXP (x, 0, 0);
14377
14378 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14379 && !MEM_P (orig_x))
14380 result = XVECEXP (x, 0, 0);
14381
14382 if (! result)
14383 return ix86_delegitimize_tls_address (orig_x);
14384
14385 if (const_addend)
14386 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14387 if (reg_addend)
14388 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14389 if (addend)
14390 {
14391 /* If the rest of original X doesn't involve the PIC register, add
14392 addend and subtract pic_offset_table_rtx. This can happen e.g.
14393 for code like:
14394 leal (%ebx, %ecx, 4), %ecx
14395 ...
14396 movl foo@GOTOFF(%ecx), %edx
14397 in which case we return (%ecx - %ebx) + foo. */
14398 if (pic_offset_table_rtx)
14399 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14400 pic_offset_table_rtx),
14401 result);
14402 else
14403 return orig_x;
14404 }
14405 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14406 {
14407 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14408 if (result == NULL_RTX)
14409 return orig_x;
14410 }
14411 return result;
14412 }
14413
14414 /* If X is a machine specific address (i.e. a symbol or label being
14415 referenced as a displacement from the GOT implemented using an
14416 UNSPEC), then return the base term. Otherwise return X. */
14417
14418 rtx
14419 ix86_find_base_term (rtx x)
14420 {
14421 rtx term;
14422
14423 if (TARGET_64BIT)
14424 {
14425 if (GET_CODE (x) != CONST)
14426 return x;
14427 term = XEXP (x, 0);
14428 if (GET_CODE (term) == PLUS
14429 && (CONST_INT_P (XEXP (term, 1))
14430 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14431 term = XEXP (term, 0);
14432 if (GET_CODE (term) != UNSPEC
14433 || (XINT (term, 1) != UNSPEC_GOTPCREL
14434 && XINT (term, 1) != UNSPEC_PCREL))
14435 return x;
14436
14437 return XVECEXP (term, 0, 0);
14438 }
14439
14440 return ix86_delegitimize_address (x);
14441 }
14442 \f
14443 static void
14444 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14445 bool fp, FILE *file)
14446 {
14447 const char *suffix;
14448
14449 if (mode == CCFPmode || mode == CCFPUmode)
14450 {
14451 code = ix86_fp_compare_code_to_integer (code);
14452 mode = CCmode;
14453 }
14454 if (reverse)
14455 code = reverse_condition (code);
14456
14457 switch (code)
14458 {
14459 case EQ:
14460 switch (mode)
14461 {
14462 case CCAmode:
14463 suffix = "a";
14464 break;
14465
14466 case CCCmode:
14467 suffix = "c";
14468 break;
14469
14470 case CCOmode:
14471 suffix = "o";
14472 break;
14473
14474 case CCSmode:
14475 suffix = "s";
14476 break;
14477
14478 default:
14479 suffix = "e";
14480 }
14481 break;
14482 case NE:
14483 switch (mode)
14484 {
14485 case CCAmode:
14486 suffix = "na";
14487 break;
14488
14489 case CCCmode:
14490 suffix = "nc";
14491 break;
14492
14493 case CCOmode:
14494 suffix = "no";
14495 break;
14496
14497 case CCSmode:
14498 suffix = "ns";
14499 break;
14500
14501 default:
14502 suffix = "ne";
14503 }
14504 break;
14505 case GT:
14506 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14507 suffix = "g";
14508 break;
14509 case GTU:
14510 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14511 Those same assemblers have the same but opposite lossage on cmov. */
14512 if (mode == CCmode)
14513 suffix = fp ? "nbe" : "a";
14514 else
14515 gcc_unreachable ();
14516 break;
14517 case LT:
14518 switch (mode)
14519 {
14520 case CCNOmode:
14521 case CCGOCmode:
14522 suffix = "s";
14523 break;
14524
14525 case CCmode:
14526 case CCGCmode:
14527 suffix = "l";
14528 break;
14529
14530 default:
14531 gcc_unreachable ();
14532 }
14533 break;
14534 case LTU:
14535 if (mode == CCmode)
14536 suffix = "b";
14537 else if (mode == CCCmode)
14538 suffix = "c";
14539 else
14540 gcc_unreachable ();
14541 break;
14542 case GE:
14543 switch (mode)
14544 {
14545 case CCNOmode:
14546 case CCGOCmode:
14547 suffix = "ns";
14548 break;
14549
14550 case CCmode:
14551 case CCGCmode:
14552 suffix = "ge";
14553 break;
14554
14555 default:
14556 gcc_unreachable ();
14557 }
14558 break;
14559 case GEU:
14560 if (mode == CCmode)
14561 suffix = fp ? "nb" : "ae";
14562 else if (mode == CCCmode)
14563 suffix = "nc";
14564 else
14565 gcc_unreachable ();
14566 break;
14567 case LE:
14568 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14569 suffix = "le";
14570 break;
14571 case LEU:
14572 if (mode == CCmode)
14573 suffix = "be";
14574 else
14575 gcc_unreachable ();
14576 break;
14577 case UNORDERED:
14578 suffix = fp ? "u" : "p";
14579 break;
14580 case ORDERED:
14581 suffix = fp ? "nu" : "np";
14582 break;
14583 default:
14584 gcc_unreachable ();
14585 }
14586 fputs (suffix, file);
14587 }
14588
14589 /* Print the name of register X to FILE based on its machine mode and number.
14590 If CODE is 'w', pretend the mode is HImode.
14591 If CODE is 'b', pretend the mode is QImode.
14592 If CODE is 'k', pretend the mode is SImode.
14593 If CODE is 'q', pretend the mode is DImode.
14594 If CODE is 'x', pretend the mode is V4SFmode.
14595 If CODE is 't', pretend the mode is V8SFmode.
14596 If CODE is 'g', pretend the mode is V16SFmode.
14597 If CODE is 'h', pretend the reg is the 'high' byte register.
14598 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14599 If CODE is 'd', duplicate the operand for AVX instruction.
14600 */
14601
14602 void
14603 print_reg (rtx x, int code, FILE *file)
14604 {
14605 const char *reg;
14606 unsigned int regno;
14607 bool duplicated = code == 'd' && TARGET_AVX;
14608
14609 if (ASSEMBLER_DIALECT == ASM_ATT)
14610 putc ('%', file);
14611
14612 if (x == pc_rtx)
14613 {
14614 gcc_assert (TARGET_64BIT);
14615 fputs ("rip", file);
14616 return;
14617 }
14618
14619 regno = true_regnum (x);
14620 gcc_assert (regno != ARG_POINTER_REGNUM
14621 && regno != FRAME_POINTER_REGNUM
14622 && regno != FLAGS_REG
14623 && regno != FPSR_REG
14624 && regno != FPCR_REG);
14625
14626 if (code == 'w' || MMX_REG_P (x))
14627 code = 2;
14628 else if (code == 'b')
14629 code = 1;
14630 else if (code == 'k')
14631 code = 4;
14632 else if (code == 'q')
14633 code = 8;
14634 else if (code == 'y')
14635 code = 3;
14636 else if (code == 'h')
14637 code = 0;
14638 else if (code == 'x')
14639 code = 16;
14640 else if (code == 't')
14641 code = 32;
14642 else if (code == 'g')
14643 code = 64;
14644 else
14645 code = GET_MODE_SIZE (GET_MODE (x));
14646
14647 /* Irritatingly, AMD extended registers use different naming convention
14648 from the normal registers: "r%d[bwd]" */
14649 if (REX_INT_REGNO_P (regno))
14650 {
14651 gcc_assert (TARGET_64BIT);
14652 putc ('r', file);
14653 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14654 switch (code)
14655 {
14656 case 0:
14657 error ("extended registers have no high halves");
14658 break;
14659 case 1:
14660 putc ('b', file);
14661 break;
14662 case 2:
14663 putc ('w', file);
14664 break;
14665 case 4:
14666 putc ('d', file);
14667 break;
14668 case 8:
14669 /* no suffix */
14670 break;
14671 default:
14672 error ("unsupported operand size for extended register");
14673 break;
14674 }
14675 return;
14676 }
14677
14678 reg = NULL;
14679 switch (code)
14680 {
14681 case 3:
14682 if (STACK_TOP_P (x))
14683 {
14684 reg = "st(0)";
14685 break;
14686 }
14687 /* FALLTHRU */
14688 case 8:
14689 case 4:
14690 case 12:
14691 if (! ANY_FP_REG_P (x))
14692 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14693 /* FALLTHRU */
14694 case 16:
14695 case 2:
14696 normal:
14697 reg = hi_reg_name[regno];
14698 break;
14699 case 1:
14700 if (regno >= ARRAY_SIZE (qi_reg_name))
14701 goto normal;
14702 reg = qi_reg_name[regno];
14703 break;
14704 case 0:
14705 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14706 goto normal;
14707 reg = qi_high_reg_name[regno];
14708 break;
14709 case 32:
14710 if (SSE_REG_P (x))
14711 {
14712 gcc_assert (!duplicated);
14713 putc ('y', file);
14714 fputs (hi_reg_name[regno] + 1, file);
14715 return;
14716 }
14717 case 64:
14718 if (SSE_REG_P (x))
14719 {
14720 gcc_assert (!duplicated);
14721 putc ('z', file);
14722 fputs (hi_reg_name[REGNO (x)] + 1, file);
14723 return;
14724 }
14725 break;
14726 default:
14727 gcc_unreachable ();
14728 }
14729
14730 fputs (reg, file);
14731 if (duplicated)
14732 {
14733 if (ASSEMBLER_DIALECT == ASM_ATT)
14734 fprintf (file, ", %%%s", reg);
14735 else
14736 fprintf (file, ", %s", reg);
14737 }
14738 }
14739
14740 /* Locate some local-dynamic symbol still in use by this function
14741 so that we can print its name in some tls_local_dynamic_base
14742 pattern. */
14743
14744 static int
14745 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14746 {
14747 rtx x = *px;
14748
14749 if (GET_CODE (x) == SYMBOL_REF
14750 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14751 {
14752 cfun->machine->some_ld_name = XSTR (x, 0);
14753 return 1;
14754 }
14755
14756 return 0;
14757 }
14758
14759 static const char *
14760 get_some_local_dynamic_name (void)
14761 {
14762 rtx insn;
14763
14764 if (cfun->machine->some_ld_name)
14765 return cfun->machine->some_ld_name;
14766
14767 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14768 if (NONDEBUG_INSN_P (insn)
14769 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14770 return cfun->machine->some_ld_name;
14771
14772 return NULL;
14773 }
14774
14775 /* Meaning of CODE:
14776 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14777 C -- print opcode suffix for set/cmov insn.
14778 c -- like C, but print reversed condition
14779 F,f -- likewise, but for floating-point.
14780 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14781 otherwise nothing
14782 R -- print embeded rounding and sae.
14783 r -- print only sae.
14784 z -- print the opcode suffix for the size of the current operand.
14785 Z -- likewise, with special suffixes for x87 instructions.
14786 * -- print a star (in certain assembler syntax)
14787 A -- print an absolute memory reference.
14788 E -- print address with DImode register names if TARGET_64BIT.
14789 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14790 s -- print a shift double count, followed by the assemblers argument
14791 delimiter.
14792 b -- print the QImode name of the register for the indicated operand.
14793 %b0 would print %al if operands[0] is reg 0.
14794 w -- likewise, print the HImode name of the register.
14795 k -- likewise, print the SImode name of the register.
14796 q -- likewise, print the DImode name of the register.
14797 x -- likewise, print the V4SFmode name of the register.
14798 t -- likewise, print the V8SFmode name of the register.
14799 g -- likewise, print the V16SFmode name of the register.
14800 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14801 y -- print "st(0)" instead of "st" as a register.
14802 d -- print duplicated register operand for AVX instruction.
14803 D -- print condition for SSE cmp instruction.
14804 P -- if PIC, print an @PLT suffix.
14805 p -- print raw symbol name.
14806 X -- don't print any sort of PIC '@' suffix for a symbol.
14807 & -- print some in-use local-dynamic symbol name.
14808 H -- print a memory address offset by 8; used for sse high-parts
14809 Y -- print condition for XOP pcom* instruction.
14810 + -- print a branch hint as 'cs' or 'ds' prefix
14811 ; -- print a semicolon (after prefixes due to bug in older gas).
14812 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14813 @ -- print a segment register of thread base pointer load
14814 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14815 */
14816
14817 void
14818 ix86_print_operand (FILE *file, rtx x, int code)
14819 {
14820 if (code)
14821 {
14822 switch (code)
14823 {
14824 case 'A':
14825 switch (ASSEMBLER_DIALECT)
14826 {
14827 case ASM_ATT:
14828 putc ('*', file);
14829 break;
14830
14831 case ASM_INTEL:
14832 /* Intel syntax. For absolute addresses, registers should not
14833 be surrounded by braces. */
14834 if (!REG_P (x))
14835 {
14836 putc ('[', file);
14837 ix86_print_operand (file, x, 0);
14838 putc (']', file);
14839 return;
14840 }
14841 break;
14842
14843 default:
14844 gcc_unreachable ();
14845 }
14846
14847 ix86_print_operand (file, x, 0);
14848 return;
14849
14850 case 'E':
14851 /* Wrap address in an UNSPEC to declare special handling. */
14852 if (TARGET_64BIT)
14853 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14854
14855 output_address (x);
14856 return;
14857
14858 case 'L':
14859 if (ASSEMBLER_DIALECT == ASM_ATT)
14860 putc ('l', file);
14861 return;
14862
14863 case 'W':
14864 if (ASSEMBLER_DIALECT == ASM_ATT)
14865 putc ('w', file);
14866 return;
14867
14868 case 'B':
14869 if (ASSEMBLER_DIALECT == ASM_ATT)
14870 putc ('b', file);
14871 return;
14872
14873 case 'Q':
14874 if (ASSEMBLER_DIALECT == ASM_ATT)
14875 putc ('l', file);
14876 return;
14877
14878 case 'S':
14879 if (ASSEMBLER_DIALECT == ASM_ATT)
14880 putc ('s', file);
14881 return;
14882
14883 case 'T':
14884 if (ASSEMBLER_DIALECT == ASM_ATT)
14885 putc ('t', file);
14886 return;
14887
14888 case 'O':
14889 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14890 if (ASSEMBLER_DIALECT != ASM_ATT)
14891 return;
14892
14893 switch (GET_MODE_SIZE (GET_MODE (x)))
14894 {
14895 case 2:
14896 putc ('w', file);
14897 break;
14898
14899 case 4:
14900 putc ('l', file);
14901 break;
14902
14903 case 8:
14904 putc ('q', file);
14905 break;
14906
14907 default:
14908 output_operand_lossage
14909 ("invalid operand size for operand code 'O'");
14910 return;
14911 }
14912
14913 putc ('.', file);
14914 #endif
14915 return;
14916
14917 case 'z':
14918 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14919 {
14920 /* Opcodes don't get size suffixes if using Intel opcodes. */
14921 if (ASSEMBLER_DIALECT == ASM_INTEL)
14922 return;
14923
14924 switch (GET_MODE_SIZE (GET_MODE (x)))
14925 {
14926 case 1:
14927 putc ('b', file);
14928 return;
14929
14930 case 2:
14931 putc ('w', file);
14932 return;
14933
14934 case 4:
14935 putc ('l', file);
14936 return;
14937
14938 case 8:
14939 putc ('q', file);
14940 return;
14941
14942 default:
14943 output_operand_lossage
14944 ("invalid operand size for operand code 'z'");
14945 return;
14946 }
14947 }
14948
14949 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14950 warning
14951 (0, "non-integer operand used with operand code 'z'");
14952 /* FALLTHRU */
14953
14954 case 'Z':
14955 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14956 if (ASSEMBLER_DIALECT == ASM_INTEL)
14957 return;
14958
14959 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14960 {
14961 switch (GET_MODE_SIZE (GET_MODE (x)))
14962 {
14963 case 2:
14964 #ifdef HAVE_AS_IX86_FILDS
14965 putc ('s', file);
14966 #endif
14967 return;
14968
14969 case 4:
14970 putc ('l', file);
14971 return;
14972
14973 case 8:
14974 #ifdef HAVE_AS_IX86_FILDQ
14975 putc ('q', file);
14976 #else
14977 fputs ("ll", file);
14978 #endif
14979 return;
14980
14981 default:
14982 break;
14983 }
14984 }
14985 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14986 {
14987 /* 387 opcodes don't get size suffixes
14988 if the operands are registers. */
14989 if (STACK_REG_P (x))
14990 return;
14991
14992 switch (GET_MODE_SIZE (GET_MODE (x)))
14993 {
14994 case 4:
14995 putc ('s', file);
14996 return;
14997
14998 case 8:
14999 putc ('l', file);
15000 return;
15001
15002 case 12:
15003 case 16:
15004 putc ('t', file);
15005 return;
15006
15007 default:
15008 break;
15009 }
15010 }
15011 else
15012 {
15013 output_operand_lossage
15014 ("invalid operand type used with operand code 'Z'");
15015 return;
15016 }
15017
15018 output_operand_lossage
15019 ("invalid operand size for operand code 'Z'");
15020 return;
15021
15022 case 'd':
15023 case 'b':
15024 case 'w':
15025 case 'k':
15026 case 'q':
15027 case 'h':
15028 case 't':
15029 case 'g':
15030 case 'y':
15031 case 'x':
15032 case 'X':
15033 case 'P':
15034 case 'p':
15035 break;
15036
15037 case 's':
15038 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15039 {
15040 ix86_print_operand (file, x, 0);
15041 fputs (", ", file);
15042 }
15043 return;
15044
15045 case 'Y':
15046 switch (GET_CODE (x))
15047 {
15048 case NE:
15049 fputs ("neq", file);
15050 break;
15051 case EQ:
15052 fputs ("eq", file);
15053 break;
15054 case GE:
15055 case GEU:
15056 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15057 break;
15058 case GT:
15059 case GTU:
15060 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15061 break;
15062 case LE:
15063 case LEU:
15064 fputs ("le", file);
15065 break;
15066 case LT:
15067 case LTU:
15068 fputs ("lt", file);
15069 break;
15070 case UNORDERED:
15071 fputs ("unord", file);
15072 break;
15073 case ORDERED:
15074 fputs ("ord", file);
15075 break;
15076 case UNEQ:
15077 fputs ("ueq", file);
15078 break;
15079 case UNGE:
15080 fputs ("nlt", file);
15081 break;
15082 case UNGT:
15083 fputs ("nle", file);
15084 break;
15085 case UNLE:
15086 fputs ("ule", file);
15087 break;
15088 case UNLT:
15089 fputs ("ult", file);
15090 break;
15091 case LTGT:
15092 fputs ("une", file);
15093 break;
15094 default:
15095 output_operand_lossage ("operand is not a condition code, "
15096 "invalid operand code 'Y'");
15097 return;
15098 }
15099 return;
15100
15101 case 'D':
15102 /* Little bit of braindamage here. The SSE compare instructions
15103 does use completely different names for the comparisons that the
15104 fp conditional moves. */
15105 switch (GET_CODE (x))
15106 {
15107 case UNEQ:
15108 if (TARGET_AVX)
15109 {
15110 fputs ("eq_us", file);
15111 break;
15112 }
15113 case EQ:
15114 fputs ("eq", file);
15115 break;
15116 case UNLT:
15117 if (TARGET_AVX)
15118 {
15119 fputs ("nge", file);
15120 break;
15121 }
15122 case LT:
15123 fputs ("lt", file);
15124 break;
15125 case UNLE:
15126 if (TARGET_AVX)
15127 {
15128 fputs ("ngt", file);
15129 break;
15130 }
15131 case LE:
15132 fputs ("le", file);
15133 break;
15134 case UNORDERED:
15135 fputs ("unord", file);
15136 break;
15137 case LTGT:
15138 if (TARGET_AVX)
15139 {
15140 fputs ("neq_oq", file);
15141 break;
15142 }
15143 case NE:
15144 fputs ("neq", file);
15145 break;
15146 case GE:
15147 if (TARGET_AVX)
15148 {
15149 fputs ("ge", file);
15150 break;
15151 }
15152 case UNGE:
15153 fputs ("nlt", file);
15154 break;
15155 case GT:
15156 if (TARGET_AVX)
15157 {
15158 fputs ("gt", file);
15159 break;
15160 }
15161 case UNGT:
15162 fputs ("nle", file);
15163 break;
15164 case ORDERED:
15165 fputs ("ord", file);
15166 break;
15167 default:
15168 output_operand_lossage ("operand is not a condition code, "
15169 "invalid operand code 'D'");
15170 return;
15171 }
15172 return;
15173
15174 case 'F':
15175 case 'f':
15176 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15177 if (ASSEMBLER_DIALECT == ASM_ATT)
15178 putc ('.', file);
15179 #endif
15180
15181 case 'C':
15182 case 'c':
15183 if (!COMPARISON_P (x))
15184 {
15185 output_operand_lossage ("operand is not a condition code, "
15186 "invalid operand code '%c'", code);
15187 return;
15188 }
15189 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15190 code == 'c' || code == 'f',
15191 code == 'F' || code == 'f',
15192 file);
15193 return;
15194
15195 case 'H':
15196 if (!offsettable_memref_p (x))
15197 {
15198 output_operand_lossage ("operand is not an offsettable memory "
15199 "reference, invalid operand code 'H'");
15200 return;
15201 }
15202 /* It doesn't actually matter what mode we use here, as we're
15203 only going to use this for printing. */
15204 x = adjust_address_nv (x, DImode, 8);
15205 /* Output 'qword ptr' for intel assembler dialect. */
15206 if (ASSEMBLER_DIALECT == ASM_INTEL)
15207 code = 'q';
15208 break;
15209
15210 case 'K':
15211 gcc_assert (CONST_INT_P (x));
15212
15213 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15214 #ifdef HAVE_AS_IX86_HLE
15215 fputs ("xacquire ", file);
15216 #else
15217 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15218 #endif
15219 else if (INTVAL (x) & IX86_HLE_RELEASE)
15220 #ifdef HAVE_AS_IX86_HLE
15221 fputs ("xrelease ", file);
15222 #else
15223 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15224 #endif
15225 /* We do not want to print value of the operand. */
15226 return;
15227
15228 case 'N':
15229 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15230 fputs ("{z}", file);
15231 return;
15232
15233 case 'r':
15234 gcc_assert (CONST_INT_P (x));
15235 gcc_assert (INTVAL (x) == ROUND_SAE);
15236
15237 if (ASSEMBLER_DIALECT == ASM_INTEL)
15238 fputs (", ", file);
15239
15240 fputs ("{sae}", file);
15241
15242 if (ASSEMBLER_DIALECT == ASM_ATT)
15243 fputs (", ", file);
15244
15245 return;
15246
15247 case 'R':
15248 gcc_assert (CONST_INT_P (x));
15249
15250 if (ASSEMBLER_DIALECT == ASM_INTEL)
15251 fputs (", ", file);
15252
15253 switch (INTVAL (x))
15254 {
15255 case ROUND_NEAREST_INT | ROUND_SAE:
15256 fputs ("{rn-sae}", file);
15257 break;
15258 case ROUND_NEG_INF | ROUND_SAE:
15259 fputs ("{rd-sae}", file);
15260 break;
15261 case ROUND_POS_INF | ROUND_SAE:
15262 fputs ("{ru-sae}", file);
15263 break;
15264 case ROUND_ZERO | ROUND_SAE:
15265 fputs ("{rz-sae}", file);
15266 break;
15267 default:
15268 gcc_unreachable ();
15269 }
15270
15271 if (ASSEMBLER_DIALECT == ASM_ATT)
15272 fputs (", ", file);
15273
15274 return;
15275
15276 case '*':
15277 if (ASSEMBLER_DIALECT == ASM_ATT)
15278 putc ('*', file);
15279 return;
15280
15281 case '&':
15282 {
15283 const char *name = get_some_local_dynamic_name ();
15284 if (name == NULL)
15285 output_operand_lossage ("'%%&' used without any "
15286 "local dynamic TLS references");
15287 else
15288 assemble_name (file, name);
15289 return;
15290 }
15291
15292 case '+':
15293 {
15294 rtx x;
15295
15296 if (!optimize
15297 || optimize_function_for_size_p (cfun)
15298 || !TARGET_BRANCH_PREDICTION_HINTS)
15299 return;
15300
15301 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15302 if (x)
15303 {
15304 int pred_val = XINT (x, 0);
15305
15306 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15307 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15308 {
15309 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15310 bool cputaken
15311 = final_forward_branch_p (current_output_insn) == 0;
15312
15313 /* Emit hints only in the case default branch prediction
15314 heuristics would fail. */
15315 if (taken != cputaken)
15316 {
15317 /* We use 3e (DS) prefix for taken branches and
15318 2e (CS) prefix for not taken branches. */
15319 if (taken)
15320 fputs ("ds ; ", file);
15321 else
15322 fputs ("cs ; ", file);
15323 }
15324 }
15325 }
15326 return;
15327 }
15328
15329 case ';':
15330 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15331 putc (';', file);
15332 #endif
15333 return;
15334
15335 case '@':
15336 if (ASSEMBLER_DIALECT == ASM_ATT)
15337 putc ('%', file);
15338
15339 /* The kernel uses a different segment register for performance
15340 reasons; a system call would not have to trash the userspace
15341 segment register, which would be expensive. */
15342 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15343 fputs ("fs", file);
15344 else
15345 fputs ("gs", file);
15346 return;
15347
15348 case '~':
15349 putc (TARGET_AVX2 ? 'i' : 'f', file);
15350 return;
15351
15352 case '^':
15353 if (TARGET_64BIT && Pmode != word_mode)
15354 fputs ("addr32 ", file);
15355 return;
15356
15357 default:
15358 output_operand_lossage ("invalid operand code '%c'", code);
15359 }
15360 }
15361
15362 if (REG_P (x))
15363 print_reg (x, code, file);
15364
15365 else if (MEM_P (x))
15366 {
15367 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15368 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15369 && GET_MODE (x) != BLKmode)
15370 {
15371 const char * size;
15372 switch (GET_MODE_SIZE (GET_MODE (x)))
15373 {
15374 case 1: size = "BYTE"; break;
15375 case 2: size = "WORD"; break;
15376 case 4: size = "DWORD"; break;
15377 case 8: size = "QWORD"; break;
15378 case 12: size = "TBYTE"; break;
15379 case 16:
15380 if (GET_MODE (x) == XFmode)
15381 size = "TBYTE";
15382 else
15383 size = "XMMWORD";
15384 break;
15385 case 32: size = "YMMWORD"; break;
15386 case 64: size = "ZMMWORD"; break;
15387 default:
15388 gcc_unreachable ();
15389 }
15390
15391 /* Check for explicit size override (codes 'b', 'w', 'k',
15392 'q' and 'x') */
15393 if (code == 'b')
15394 size = "BYTE";
15395 else if (code == 'w')
15396 size = "WORD";
15397 else if (code == 'k')
15398 size = "DWORD";
15399 else if (code == 'q')
15400 size = "QWORD";
15401 else if (code == 'x')
15402 size = "XMMWORD";
15403
15404 fputs (size, file);
15405 fputs (" PTR ", file);
15406 }
15407
15408 x = XEXP (x, 0);
15409 /* Avoid (%rip) for call operands. */
15410 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15411 && !CONST_INT_P (x))
15412 output_addr_const (file, x);
15413 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15414 output_operand_lossage ("invalid constraints for operand");
15415 else
15416 output_address (x);
15417 }
15418
15419 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15420 {
15421 REAL_VALUE_TYPE r;
15422 long l;
15423
15424 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15425 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15426
15427 if (ASSEMBLER_DIALECT == ASM_ATT)
15428 putc ('$', file);
15429 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15430 if (code == 'q')
15431 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15432 (unsigned long long) (int) l);
15433 else
15434 fprintf (file, "0x%08x", (unsigned int) l);
15435 }
15436
15437 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15438 {
15439 REAL_VALUE_TYPE r;
15440 long l[2];
15441
15442 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15443 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15444
15445 if (ASSEMBLER_DIALECT == ASM_ATT)
15446 putc ('$', file);
15447 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15448 }
15449
15450 /* These float cases don't actually occur as immediate operands. */
15451 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15452 {
15453 char dstr[30];
15454
15455 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15456 fputs (dstr, file);
15457 }
15458
15459 else
15460 {
15461 /* We have patterns that allow zero sets of memory, for instance.
15462 In 64-bit mode, we should probably support all 8-byte vectors,
15463 since we can in fact encode that into an immediate. */
15464 if (GET_CODE (x) == CONST_VECTOR)
15465 {
15466 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15467 x = const0_rtx;
15468 }
15469
15470 if (code != 'P' && code != 'p')
15471 {
15472 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15473 {
15474 if (ASSEMBLER_DIALECT == ASM_ATT)
15475 putc ('$', file);
15476 }
15477 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15478 || GET_CODE (x) == LABEL_REF)
15479 {
15480 if (ASSEMBLER_DIALECT == ASM_ATT)
15481 putc ('$', file);
15482 else
15483 fputs ("OFFSET FLAT:", file);
15484 }
15485 }
15486 if (CONST_INT_P (x))
15487 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15488 else if (flag_pic || MACHOPIC_INDIRECT)
15489 output_pic_addr_const (file, x, code);
15490 else
15491 output_addr_const (file, x);
15492 }
15493 }
15494
15495 static bool
15496 ix86_print_operand_punct_valid_p (unsigned char code)
15497 {
15498 return (code == '@' || code == '*' || code == '+' || code == '&'
15499 || code == ';' || code == '~' || code == '^');
15500 }
15501 \f
15502 /* Print a memory operand whose address is ADDR. */
15503
15504 static void
15505 ix86_print_operand_address (FILE *file, rtx addr)
15506 {
15507 struct ix86_address parts;
15508 rtx base, index, disp;
15509 int scale;
15510 int ok;
15511 bool vsib = false;
15512 int code = 0;
15513
15514 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15515 {
15516 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15517 gcc_assert (parts.index == NULL_RTX);
15518 parts.index = XVECEXP (addr, 0, 1);
15519 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15520 addr = XVECEXP (addr, 0, 0);
15521 vsib = true;
15522 }
15523 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15524 {
15525 gcc_assert (TARGET_64BIT);
15526 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15527 code = 'q';
15528 }
15529 else
15530 ok = ix86_decompose_address (addr, &parts);
15531
15532 gcc_assert (ok);
15533
15534 base = parts.base;
15535 index = parts.index;
15536 disp = parts.disp;
15537 scale = parts.scale;
15538
15539 switch (parts.seg)
15540 {
15541 case SEG_DEFAULT:
15542 break;
15543 case SEG_FS:
15544 case SEG_GS:
15545 if (ASSEMBLER_DIALECT == ASM_ATT)
15546 putc ('%', file);
15547 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15548 break;
15549 default:
15550 gcc_unreachable ();
15551 }
15552
15553 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15554 if (TARGET_64BIT && !base && !index)
15555 {
15556 rtx symbol = disp;
15557
15558 if (GET_CODE (disp) == CONST
15559 && GET_CODE (XEXP (disp, 0)) == PLUS
15560 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15561 symbol = XEXP (XEXP (disp, 0), 0);
15562
15563 if (GET_CODE (symbol) == LABEL_REF
15564 || (GET_CODE (symbol) == SYMBOL_REF
15565 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15566 base = pc_rtx;
15567 }
15568 if (!base && !index)
15569 {
15570 /* Displacement only requires special attention. */
15571
15572 if (CONST_INT_P (disp))
15573 {
15574 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15575 fputs ("ds:", file);
15576 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15577 }
15578 else if (flag_pic)
15579 output_pic_addr_const (file, disp, 0);
15580 else
15581 output_addr_const (file, disp);
15582 }
15583 else
15584 {
15585 /* Print SImode register names to force addr32 prefix. */
15586 if (SImode_address_operand (addr, VOIDmode))
15587 {
15588 #ifdef ENABLE_CHECKING
15589 gcc_assert (TARGET_64BIT);
15590 switch (GET_CODE (addr))
15591 {
15592 case SUBREG:
15593 gcc_assert (GET_MODE (addr) == SImode);
15594 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15595 break;
15596 case ZERO_EXTEND:
15597 case AND:
15598 gcc_assert (GET_MODE (addr) == DImode);
15599 break;
15600 default:
15601 gcc_unreachable ();
15602 }
15603 #endif
15604 gcc_assert (!code);
15605 code = 'k';
15606 }
15607 else if (code == 0
15608 && TARGET_X32
15609 && disp
15610 && CONST_INT_P (disp)
15611 && INTVAL (disp) < -16*1024*1024)
15612 {
15613 /* X32 runs in 64-bit mode, where displacement, DISP, in
15614 address DISP(%r64), is encoded as 32-bit immediate sign-
15615 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15616 address is %r64 + 0xffffffffbffffd00. When %r64 <
15617 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15618 which is invalid for x32. The correct address is %r64
15619 - 0x40000300 == 0xf7ffdd64. To properly encode
15620 -0x40000300(%r64) for x32, we zero-extend negative
15621 displacement by forcing addr32 prefix which truncates
15622 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15623 zero-extend all negative displacements, including -1(%rsp).
15624 However, for small negative displacements, sign-extension
15625 won't cause overflow. We only zero-extend negative
15626 displacements if they < -16*1024*1024, which is also used
15627 to check legitimate address displacements for PIC. */
15628 code = 'k';
15629 }
15630
15631 if (ASSEMBLER_DIALECT == ASM_ATT)
15632 {
15633 if (disp)
15634 {
15635 if (flag_pic)
15636 output_pic_addr_const (file, disp, 0);
15637 else if (GET_CODE (disp) == LABEL_REF)
15638 output_asm_label (disp);
15639 else
15640 output_addr_const (file, disp);
15641 }
15642
15643 putc ('(', file);
15644 if (base)
15645 print_reg (base, code, file);
15646 if (index)
15647 {
15648 putc (',', file);
15649 print_reg (index, vsib ? 0 : code, file);
15650 if (scale != 1 || vsib)
15651 fprintf (file, ",%d", scale);
15652 }
15653 putc (')', file);
15654 }
15655 else
15656 {
15657 rtx offset = NULL_RTX;
15658
15659 if (disp)
15660 {
15661 /* Pull out the offset of a symbol; print any symbol itself. */
15662 if (GET_CODE (disp) == CONST
15663 && GET_CODE (XEXP (disp, 0)) == PLUS
15664 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15665 {
15666 offset = XEXP (XEXP (disp, 0), 1);
15667 disp = gen_rtx_CONST (VOIDmode,
15668 XEXP (XEXP (disp, 0), 0));
15669 }
15670
15671 if (flag_pic)
15672 output_pic_addr_const (file, disp, 0);
15673 else if (GET_CODE (disp) == LABEL_REF)
15674 output_asm_label (disp);
15675 else if (CONST_INT_P (disp))
15676 offset = disp;
15677 else
15678 output_addr_const (file, disp);
15679 }
15680
15681 putc ('[', file);
15682 if (base)
15683 {
15684 print_reg (base, code, file);
15685 if (offset)
15686 {
15687 if (INTVAL (offset) >= 0)
15688 putc ('+', file);
15689 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15690 }
15691 }
15692 else if (offset)
15693 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15694 else
15695 putc ('0', file);
15696
15697 if (index)
15698 {
15699 putc ('+', file);
15700 print_reg (index, vsib ? 0 : code, file);
15701 if (scale != 1 || vsib)
15702 fprintf (file, "*%d", scale);
15703 }
15704 putc (']', file);
15705 }
15706 }
15707 }
15708
15709 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15710
15711 static bool
15712 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15713 {
15714 rtx op;
15715
15716 if (GET_CODE (x) != UNSPEC)
15717 return false;
15718
15719 op = XVECEXP (x, 0, 0);
15720 switch (XINT (x, 1))
15721 {
15722 case UNSPEC_GOTTPOFF:
15723 output_addr_const (file, op);
15724 /* FIXME: This might be @TPOFF in Sun ld. */
15725 fputs ("@gottpoff", file);
15726 break;
15727 case UNSPEC_TPOFF:
15728 output_addr_const (file, op);
15729 fputs ("@tpoff", file);
15730 break;
15731 case UNSPEC_NTPOFF:
15732 output_addr_const (file, op);
15733 if (TARGET_64BIT)
15734 fputs ("@tpoff", file);
15735 else
15736 fputs ("@ntpoff", file);
15737 break;
15738 case UNSPEC_DTPOFF:
15739 output_addr_const (file, op);
15740 fputs ("@dtpoff", file);
15741 break;
15742 case UNSPEC_GOTNTPOFF:
15743 output_addr_const (file, op);
15744 if (TARGET_64BIT)
15745 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15746 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15747 else
15748 fputs ("@gotntpoff", file);
15749 break;
15750 case UNSPEC_INDNTPOFF:
15751 output_addr_const (file, op);
15752 fputs ("@indntpoff", file);
15753 break;
15754 #if TARGET_MACHO
15755 case UNSPEC_MACHOPIC_OFFSET:
15756 output_addr_const (file, op);
15757 putc ('-', file);
15758 machopic_output_function_base_name (file);
15759 break;
15760 #endif
15761
15762 case UNSPEC_STACK_CHECK:
15763 {
15764 int offset;
15765
15766 gcc_assert (flag_split_stack);
15767
15768 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15769 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15770 #else
15771 gcc_unreachable ();
15772 #endif
15773
15774 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15775 }
15776 break;
15777
15778 default:
15779 return false;
15780 }
15781
15782 return true;
15783 }
15784 \f
15785 /* Split one or more double-mode RTL references into pairs of half-mode
15786 references. The RTL can be REG, offsettable MEM, integer constant, or
15787 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15788 split and "num" is its length. lo_half and hi_half are output arrays
15789 that parallel "operands". */
15790
15791 void
15792 split_double_mode (enum machine_mode mode, rtx operands[],
15793 int num, rtx lo_half[], rtx hi_half[])
15794 {
15795 enum machine_mode half_mode;
15796 unsigned int byte;
15797
15798 switch (mode)
15799 {
15800 case TImode:
15801 half_mode = DImode;
15802 break;
15803 case DImode:
15804 half_mode = SImode;
15805 break;
15806 default:
15807 gcc_unreachable ();
15808 }
15809
15810 byte = GET_MODE_SIZE (half_mode);
15811
15812 while (num--)
15813 {
15814 rtx op = operands[num];
15815
15816 /* simplify_subreg refuse to split volatile memory addresses,
15817 but we still have to handle it. */
15818 if (MEM_P (op))
15819 {
15820 lo_half[num] = adjust_address (op, half_mode, 0);
15821 hi_half[num] = adjust_address (op, half_mode, byte);
15822 }
15823 else
15824 {
15825 lo_half[num] = simplify_gen_subreg (half_mode, op,
15826 GET_MODE (op) == VOIDmode
15827 ? mode : GET_MODE (op), 0);
15828 hi_half[num] = simplify_gen_subreg (half_mode, op,
15829 GET_MODE (op) == VOIDmode
15830 ? mode : GET_MODE (op), byte);
15831 }
15832 }
15833 }
15834 \f
15835 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15836 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15837 is the expression of the binary operation. The output may either be
15838 emitted here, or returned to the caller, like all output_* functions.
15839
15840 There is no guarantee that the operands are the same mode, as they
15841 might be within FLOAT or FLOAT_EXTEND expressions. */
15842
15843 #ifndef SYSV386_COMPAT
15844 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15845 wants to fix the assemblers because that causes incompatibility
15846 with gcc. No-one wants to fix gcc because that causes
15847 incompatibility with assemblers... You can use the option of
15848 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15849 #define SYSV386_COMPAT 1
15850 #endif
15851
15852 const char *
15853 output_387_binary_op (rtx insn, rtx *operands)
15854 {
15855 static char buf[40];
15856 const char *p;
15857 const char *ssep;
15858 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15859
15860 #ifdef ENABLE_CHECKING
15861 /* Even if we do not want to check the inputs, this documents input
15862 constraints. Which helps in understanding the following code. */
15863 if (STACK_REG_P (operands[0])
15864 && ((REG_P (operands[1])
15865 && REGNO (operands[0]) == REGNO (operands[1])
15866 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15867 || (REG_P (operands[2])
15868 && REGNO (operands[0]) == REGNO (operands[2])
15869 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15870 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15871 ; /* ok */
15872 else
15873 gcc_assert (is_sse);
15874 #endif
15875
15876 switch (GET_CODE (operands[3]))
15877 {
15878 case PLUS:
15879 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15880 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15881 p = "fiadd";
15882 else
15883 p = "fadd";
15884 ssep = "vadd";
15885 break;
15886
15887 case MINUS:
15888 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15889 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15890 p = "fisub";
15891 else
15892 p = "fsub";
15893 ssep = "vsub";
15894 break;
15895
15896 case MULT:
15897 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15898 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15899 p = "fimul";
15900 else
15901 p = "fmul";
15902 ssep = "vmul";
15903 break;
15904
15905 case DIV:
15906 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15907 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15908 p = "fidiv";
15909 else
15910 p = "fdiv";
15911 ssep = "vdiv";
15912 break;
15913
15914 default:
15915 gcc_unreachable ();
15916 }
15917
15918 if (is_sse)
15919 {
15920 if (TARGET_AVX)
15921 {
15922 strcpy (buf, ssep);
15923 if (GET_MODE (operands[0]) == SFmode)
15924 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15925 else
15926 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15927 }
15928 else
15929 {
15930 strcpy (buf, ssep + 1);
15931 if (GET_MODE (operands[0]) == SFmode)
15932 strcat (buf, "ss\t{%2, %0|%0, %2}");
15933 else
15934 strcat (buf, "sd\t{%2, %0|%0, %2}");
15935 }
15936 return buf;
15937 }
15938 strcpy (buf, p);
15939
15940 switch (GET_CODE (operands[3]))
15941 {
15942 case MULT:
15943 case PLUS:
15944 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15945 {
15946 rtx temp = operands[2];
15947 operands[2] = operands[1];
15948 operands[1] = temp;
15949 }
15950
15951 /* know operands[0] == operands[1]. */
15952
15953 if (MEM_P (operands[2]))
15954 {
15955 p = "%Z2\t%2";
15956 break;
15957 }
15958
15959 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15960 {
15961 if (STACK_TOP_P (operands[0]))
15962 /* How is it that we are storing to a dead operand[2]?
15963 Well, presumably operands[1] is dead too. We can't
15964 store the result to st(0) as st(0) gets popped on this
15965 instruction. Instead store to operands[2] (which I
15966 think has to be st(1)). st(1) will be popped later.
15967 gcc <= 2.8.1 didn't have this check and generated
15968 assembly code that the Unixware assembler rejected. */
15969 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15970 else
15971 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15972 break;
15973 }
15974
15975 if (STACK_TOP_P (operands[0]))
15976 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15977 else
15978 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15979 break;
15980
15981 case MINUS:
15982 case DIV:
15983 if (MEM_P (operands[1]))
15984 {
15985 p = "r%Z1\t%1";
15986 break;
15987 }
15988
15989 if (MEM_P (operands[2]))
15990 {
15991 p = "%Z2\t%2";
15992 break;
15993 }
15994
15995 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15996 {
15997 #if SYSV386_COMPAT
15998 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15999 derived assemblers, confusingly reverse the direction of
16000 the operation for fsub{r} and fdiv{r} when the
16001 destination register is not st(0). The Intel assembler
16002 doesn't have this brain damage. Read !SYSV386_COMPAT to
16003 figure out what the hardware really does. */
16004 if (STACK_TOP_P (operands[0]))
16005 p = "{p\t%0, %2|rp\t%2, %0}";
16006 else
16007 p = "{rp\t%2, %0|p\t%0, %2}";
16008 #else
16009 if (STACK_TOP_P (operands[0]))
16010 /* As above for fmul/fadd, we can't store to st(0). */
16011 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16012 else
16013 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16014 #endif
16015 break;
16016 }
16017
16018 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16019 {
16020 #if SYSV386_COMPAT
16021 if (STACK_TOP_P (operands[0]))
16022 p = "{rp\t%0, %1|p\t%1, %0}";
16023 else
16024 p = "{p\t%1, %0|rp\t%0, %1}";
16025 #else
16026 if (STACK_TOP_P (operands[0]))
16027 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16028 else
16029 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16030 #endif
16031 break;
16032 }
16033
16034 if (STACK_TOP_P (operands[0]))
16035 {
16036 if (STACK_TOP_P (operands[1]))
16037 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16038 else
16039 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16040 break;
16041 }
16042 else if (STACK_TOP_P (operands[1]))
16043 {
16044 #if SYSV386_COMPAT
16045 p = "{\t%1, %0|r\t%0, %1}";
16046 #else
16047 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16048 #endif
16049 }
16050 else
16051 {
16052 #if SYSV386_COMPAT
16053 p = "{r\t%2, %0|\t%0, %2}";
16054 #else
16055 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16056 #endif
16057 }
16058 break;
16059
16060 default:
16061 gcc_unreachable ();
16062 }
16063
16064 strcat (buf, p);
16065 return buf;
16066 }
16067
16068 /* Check if a 256bit AVX register is referenced inside of EXP. */
16069
16070 static int
16071 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16072 {
16073 rtx exp = *pexp;
16074
16075 if (GET_CODE (exp) == SUBREG)
16076 exp = SUBREG_REG (exp);
16077
16078 if (REG_P (exp)
16079 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16080 return 1;
16081
16082 return 0;
16083 }
16084
16085 /* Return needed mode for entity in optimize_mode_switching pass. */
16086
16087 static int
16088 ix86_avx_u128_mode_needed (rtx insn)
16089 {
16090 if (CALL_P (insn))
16091 {
16092 rtx link;
16093
16094 /* Needed mode is set to AVX_U128_CLEAN if there are
16095 no 256bit modes used in function arguments. */
16096 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16097 link;
16098 link = XEXP (link, 1))
16099 {
16100 if (GET_CODE (XEXP (link, 0)) == USE)
16101 {
16102 rtx arg = XEXP (XEXP (link, 0), 0);
16103
16104 if (ix86_check_avx256_register (&arg, NULL))
16105 return AVX_U128_DIRTY;
16106 }
16107 }
16108
16109 return AVX_U128_CLEAN;
16110 }
16111
16112 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16113 changes state only when a 256bit register is written to, but we need
16114 to prevent the compiler from moving optimal insertion point above
16115 eventual read from 256bit register. */
16116 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16117 return AVX_U128_DIRTY;
16118
16119 return AVX_U128_ANY;
16120 }
16121
16122 /* Return mode that i387 must be switched into
16123 prior to the execution of insn. */
16124
16125 static int
16126 ix86_i387_mode_needed (int entity, rtx insn)
16127 {
16128 enum attr_i387_cw mode;
16129
16130 /* The mode UNINITIALIZED is used to store control word after a
16131 function call or ASM pattern. The mode ANY specify that function
16132 has no requirements on the control word and make no changes in the
16133 bits we are interested in. */
16134
16135 if (CALL_P (insn)
16136 || (NONJUMP_INSN_P (insn)
16137 && (asm_noperands (PATTERN (insn)) >= 0
16138 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16139 return I387_CW_UNINITIALIZED;
16140
16141 if (recog_memoized (insn) < 0)
16142 return I387_CW_ANY;
16143
16144 mode = get_attr_i387_cw (insn);
16145
16146 switch (entity)
16147 {
16148 case I387_TRUNC:
16149 if (mode == I387_CW_TRUNC)
16150 return mode;
16151 break;
16152
16153 case I387_FLOOR:
16154 if (mode == I387_CW_FLOOR)
16155 return mode;
16156 break;
16157
16158 case I387_CEIL:
16159 if (mode == I387_CW_CEIL)
16160 return mode;
16161 break;
16162
16163 case I387_MASK_PM:
16164 if (mode == I387_CW_MASK_PM)
16165 return mode;
16166 break;
16167
16168 default:
16169 gcc_unreachable ();
16170 }
16171
16172 return I387_CW_ANY;
16173 }
16174
16175 /* Return mode that entity must be switched into
16176 prior to the execution of insn. */
16177
16178 static int
16179 ix86_mode_needed (int entity, rtx insn)
16180 {
16181 switch (entity)
16182 {
16183 case AVX_U128:
16184 return ix86_avx_u128_mode_needed (insn);
16185 case I387_TRUNC:
16186 case I387_FLOOR:
16187 case I387_CEIL:
16188 case I387_MASK_PM:
16189 return ix86_i387_mode_needed (entity, insn);
16190 default:
16191 gcc_unreachable ();
16192 }
16193 return 0;
16194 }
16195
16196 /* Check if a 256bit AVX register is referenced in stores. */
16197
16198 static void
16199 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16200 {
16201 if (ix86_check_avx256_register (&dest, NULL))
16202 {
16203 bool *used = (bool *) data;
16204 *used = true;
16205 }
16206 }
16207
16208 /* Calculate mode of upper 128bit AVX registers after the insn. */
16209
16210 static int
16211 ix86_avx_u128_mode_after (int mode, rtx insn)
16212 {
16213 rtx pat = PATTERN (insn);
16214
16215 if (vzeroupper_operation (pat, VOIDmode)
16216 || vzeroall_operation (pat, VOIDmode))
16217 return AVX_U128_CLEAN;
16218
16219 /* We know that state is clean after CALL insn if there are no
16220 256bit registers used in the function return register. */
16221 if (CALL_P (insn))
16222 {
16223 bool avx_reg256_found = false;
16224 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16225
16226 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16227 }
16228
16229 /* Otherwise, return current mode. Remember that if insn
16230 references AVX 256bit registers, the mode was already changed
16231 to DIRTY from MODE_NEEDED. */
16232 return mode;
16233 }
16234
16235 /* Return the mode that an insn results in. */
16236
16237 int
16238 ix86_mode_after (int entity, int mode, rtx insn)
16239 {
16240 switch (entity)
16241 {
16242 case AVX_U128:
16243 return ix86_avx_u128_mode_after (mode, insn);
16244 case I387_TRUNC:
16245 case I387_FLOOR:
16246 case I387_CEIL:
16247 case I387_MASK_PM:
16248 return mode;
16249 default:
16250 gcc_unreachable ();
16251 }
16252 }
16253
16254 static int
16255 ix86_avx_u128_mode_entry (void)
16256 {
16257 tree arg;
16258
16259 /* Entry mode is set to AVX_U128_DIRTY if there are
16260 256bit modes used in function arguments. */
16261 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16262 arg = TREE_CHAIN (arg))
16263 {
16264 rtx incoming = DECL_INCOMING_RTL (arg);
16265
16266 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16267 return AVX_U128_DIRTY;
16268 }
16269
16270 return AVX_U128_CLEAN;
16271 }
16272
16273 /* Return a mode that ENTITY is assumed to be
16274 switched to at function entry. */
16275
16276 static int
16277 ix86_mode_entry (int entity)
16278 {
16279 switch (entity)
16280 {
16281 case AVX_U128:
16282 return ix86_avx_u128_mode_entry ();
16283 case I387_TRUNC:
16284 case I387_FLOOR:
16285 case I387_CEIL:
16286 case I387_MASK_PM:
16287 return I387_CW_ANY;
16288 default:
16289 gcc_unreachable ();
16290 }
16291 }
16292
16293 static int
16294 ix86_avx_u128_mode_exit (void)
16295 {
16296 rtx reg = crtl->return_rtx;
16297
16298 /* Exit mode is set to AVX_U128_DIRTY if there are
16299 256bit modes used in the function return register. */
16300 if (reg && ix86_check_avx256_register (&reg, NULL))
16301 return AVX_U128_DIRTY;
16302
16303 return AVX_U128_CLEAN;
16304 }
16305
16306 /* Return a mode that ENTITY is assumed to be
16307 switched to at function exit. */
16308
16309 static int
16310 ix86_mode_exit (int entity)
16311 {
16312 switch (entity)
16313 {
16314 case AVX_U128:
16315 return ix86_avx_u128_mode_exit ();
16316 case I387_TRUNC:
16317 case I387_FLOOR:
16318 case I387_CEIL:
16319 case I387_MASK_PM:
16320 return I387_CW_ANY;
16321 default:
16322 gcc_unreachable ();
16323 }
16324 }
16325
16326 static int
16327 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16328 {
16329 return n;
16330 }
16331
16332 /* Output code to initialize control word copies used by trunc?f?i and
16333 rounding patterns. CURRENT_MODE is set to current control word,
16334 while NEW_MODE is set to new control word. */
16335
16336 static void
16337 emit_i387_cw_initialization (int mode)
16338 {
16339 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16340 rtx new_mode;
16341
16342 enum ix86_stack_slot slot;
16343
16344 rtx reg = gen_reg_rtx (HImode);
16345
16346 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16347 emit_move_insn (reg, copy_rtx (stored_mode));
16348
16349 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16350 || optimize_insn_for_size_p ())
16351 {
16352 switch (mode)
16353 {
16354 case I387_CW_TRUNC:
16355 /* round toward zero (truncate) */
16356 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16357 slot = SLOT_CW_TRUNC;
16358 break;
16359
16360 case I387_CW_FLOOR:
16361 /* round down toward -oo */
16362 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16363 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16364 slot = SLOT_CW_FLOOR;
16365 break;
16366
16367 case I387_CW_CEIL:
16368 /* round up toward +oo */
16369 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16370 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16371 slot = SLOT_CW_CEIL;
16372 break;
16373
16374 case I387_CW_MASK_PM:
16375 /* mask precision exception for nearbyint() */
16376 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16377 slot = SLOT_CW_MASK_PM;
16378 break;
16379
16380 default:
16381 gcc_unreachable ();
16382 }
16383 }
16384 else
16385 {
16386 switch (mode)
16387 {
16388 case I387_CW_TRUNC:
16389 /* round toward zero (truncate) */
16390 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16391 slot = SLOT_CW_TRUNC;
16392 break;
16393
16394 case I387_CW_FLOOR:
16395 /* round down toward -oo */
16396 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16397 slot = SLOT_CW_FLOOR;
16398 break;
16399
16400 case I387_CW_CEIL:
16401 /* round up toward +oo */
16402 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16403 slot = SLOT_CW_CEIL;
16404 break;
16405
16406 case I387_CW_MASK_PM:
16407 /* mask precision exception for nearbyint() */
16408 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16409 slot = SLOT_CW_MASK_PM;
16410 break;
16411
16412 default:
16413 gcc_unreachable ();
16414 }
16415 }
16416
16417 gcc_assert (slot < MAX_386_STACK_LOCALS);
16418
16419 new_mode = assign_386_stack_local (HImode, slot);
16420 emit_move_insn (new_mode, reg);
16421 }
16422
16423 /* Emit vzeroupper. */
16424
16425 void
16426 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16427 {
16428 int i;
16429
16430 /* Cancel automatic vzeroupper insertion if there are
16431 live call-saved SSE registers at the insertion point. */
16432
16433 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16434 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16435 return;
16436
16437 if (TARGET_64BIT)
16438 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16439 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16440 return;
16441
16442 emit_insn (gen_avx_vzeroupper ());
16443 }
16444
16445 /* Generate one or more insns to set ENTITY to MODE. */
16446
16447 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16448 is the set of hard registers live at the point where the insn(s)
16449 are to be inserted. */
16450
16451 static void
16452 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16453 HARD_REG_SET regs_live)
16454 {
16455 switch (entity)
16456 {
16457 case AVX_U128:
16458 if (mode == AVX_U128_CLEAN)
16459 ix86_avx_emit_vzeroupper (regs_live);
16460 break;
16461 case I387_TRUNC:
16462 case I387_FLOOR:
16463 case I387_CEIL:
16464 case I387_MASK_PM:
16465 if (mode != I387_CW_ANY
16466 && mode != I387_CW_UNINITIALIZED)
16467 emit_i387_cw_initialization (mode);
16468 break;
16469 default:
16470 gcc_unreachable ();
16471 }
16472 }
16473
16474 /* Output code for INSN to convert a float to a signed int. OPERANDS
16475 are the insn operands. The output may be [HSD]Imode and the input
16476 operand may be [SDX]Fmode. */
16477
16478 const char *
16479 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16480 {
16481 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16482 int dimode_p = GET_MODE (operands[0]) == DImode;
16483 int round_mode = get_attr_i387_cw (insn);
16484
16485 /* Jump through a hoop or two for DImode, since the hardware has no
16486 non-popping instruction. We used to do this a different way, but
16487 that was somewhat fragile and broke with post-reload splitters. */
16488 if ((dimode_p || fisttp) && !stack_top_dies)
16489 output_asm_insn ("fld\t%y1", operands);
16490
16491 gcc_assert (STACK_TOP_P (operands[1]));
16492 gcc_assert (MEM_P (operands[0]));
16493 gcc_assert (GET_MODE (operands[1]) != TFmode);
16494
16495 if (fisttp)
16496 output_asm_insn ("fisttp%Z0\t%0", operands);
16497 else
16498 {
16499 if (round_mode != I387_CW_ANY)
16500 output_asm_insn ("fldcw\t%3", operands);
16501 if (stack_top_dies || dimode_p)
16502 output_asm_insn ("fistp%Z0\t%0", operands);
16503 else
16504 output_asm_insn ("fist%Z0\t%0", operands);
16505 if (round_mode != I387_CW_ANY)
16506 output_asm_insn ("fldcw\t%2", operands);
16507 }
16508
16509 return "";
16510 }
16511
16512 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16513 have the values zero or one, indicates the ffreep insn's operand
16514 from the OPERANDS array. */
16515
16516 static const char *
16517 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16518 {
16519 if (TARGET_USE_FFREEP)
16520 #ifdef HAVE_AS_IX86_FFREEP
16521 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16522 #else
16523 {
16524 static char retval[32];
16525 int regno = REGNO (operands[opno]);
16526
16527 gcc_assert (STACK_REGNO_P (regno));
16528
16529 regno -= FIRST_STACK_REG;
16530
16531 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16532 return retval;
16533 }
16534 #endif
16535
16536 return opno ? "fstp\t%y1" : "fstp\t%y0";
16537 }
16538
16539
16540 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16541 should be used. UNORDERED_P is true when fucom should be used. */
16542
16543 const char *
16544 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16545 {
16546 int stack_top_dies;
16547 rtx cmp_op0, cmp_op1;
16548 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16549
16550 if (eflags_p)
16551 {
16552 cmp_op0 = operands[0];
16553 cmp_op1 = operands[1];
16554 }
16555 else
16556 {
16557 cmp_op0 = operands[1];
16558 cmp_op1 = operands[2];
16559 }
16560
16561 if (is_sse)
16562 {
16563 if (GET_MODE (operands[0]) == SFmode)
16564 if (unordered_p)
16565 return "%vucomiss\t{%1, %0|%0, %1}";
16566 else
16567 return "%vcomiss\t{%1, %0|%0, %1}";
16568 else
16569 if (unordered_p)
16570 return "%vucomisd\t{%1, %0|%0, %1}";
16571 else
16572 return "%vcomisd\t{%1, %0|%0, %1}";
16573 }
16574
16575 gcc_assert (STACK_TOP_P (cmp_op0));
16576
16577 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16578
16579 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16580 {
16581 if (stack_top_dies)
16582 {
16583 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16584 return output_387_ffreep (operands, 1);
16585 }
16586 else
16587 return "ftst\n\tfnstsw\t%0";
16588 }
16589
16590 if (STACK_REG_P (cmp_op1)
16591 && stack_top_dies
16592 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16593 && REGNO (cmp_op1) != FIRST_STACK_REG)
16594 {
16595 /* If both the top of the 387 stack dies, and the other operand
16596 is also a stack register that dies, then this must be a
16597 `fcompp' float compare */
16598
16599 if (eflags_p)
16600 {
16601 /* There is no double popping fcomi variant. Fortunately,
16602 eflags is immune from the fstp's cc clobbering. */
16603 if (unordered_p)
16604 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16605 else
16606 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16607 return output_387_ffreep (operands, 0);
16608 }
16609 else
16610 {
16611 if (unordered_p)
16612 return "fucompp\n\tfnstsw\t%0";
16613 else
16614 return "fcompp\n\tfnstsw\t%0";
16615 }
16616 }
16617 else
16618 {
16619 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16620
16621 static const char * const alt[16] =
16622 {
16623 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16624 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16625 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16626 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16627
16628 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16629 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16630 NULL,
16631 NULL,
16632
16633 "fcomi\t{%y1, %0|%0, %y1}",
16634 "fcomip\t{%y1, %0|%0, %y1}",
16635 "fucomi\t{%y1, %0|%0, %y1}",
16636 "fucomip\t{%y1, %0|%0, %y1}",
16637
16638 NULL,
16639 NULL,
16640 NULL,
16641 NULL
16642 };
16643
16644 int mask;
16645 const char *ret;
16646
16647 mask = eflags_p << 3;
16648 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16649 mask |= unordered_p << 1;
16650 mask |= stack_top_dies;
16651
16652 gcc_assert (mask < 16);
16653 ret = alt[mask];
16654 gcc_assert (ret);
16655
16656 return ret;
16657 }
16658 }
16659
16660 void
16661 ix86_output_addr_vec_elt (FILE *file, int value)
16662 {
16663 const char *directive = ASM_LONG;
16664
16665 #ifdef ASM_QUAD
16666 if (TARGET_LP64)
16667 directive = ASM_QUAD;
16668 #else
16669 gcc_assert (!TARGET_64BIT);
16670 #endif
16671
16672 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16673 }
16674
16675 void
16676 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16677 {
16678 const char *directive = ASM_LONG;
16679
16680 #ifdef ASM_QUAD
16681 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16682 directive = ASM_QUAD;
16683 #else
16684 gcc_assert (!TARGET_64BIT);
16685 #endif
16686 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16687 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16688 fprintf (file, "%s%s%d-%s%d\n",
16689 directive, LPREFIX, value, LPREFIX, rel);
16690 else if (HAVE_AS_GOTOFF_IN_DATA)
16691 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16692 #if TARGET_MACHO
16693 else if (TARGET_MACHO)
16694 {
16695 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16696 machopic_output_function_base_name (file);
16697 putc ('\n', file);
16698 }
16699 #endif
16700 else
16701 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16702 GOT_SYMBOL_NAME, LPREFIX, value);
16703 }
16704 \f
16705 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16706 for the target. */
16707
16708 void
16709 ix86_expand_clear (rtx dest)
16710 {
16711 rtx tmp;
16712
16713 /* We play register width games, which are only valid after reload. */
16714 gcc_assert (reload_completed);
16715
16716 /* Avoid HImode and its attendant prefix byte. */
16717 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16718 dest = gen_rtx_REG (SImode, REGNO (dest));
16719 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16720
16721 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16722 {
16723 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16724 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16725 }
16726
16727 emit_insn (tmp);
16728 }
16729
16730 /* X is an unchanging MEM. If it is a constant pool reference, return
16731 the constant pool rtx, else NULL. */
16732
16733 rtx
16734 maybe_get_pool_constant (rtx x)
16735 {
16736 x = ix86_delegitimize_address (XEXP (x, 0));
16737
16738 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16739 return get_pool_constant (x);
16740
16741 return NULL_RTX;
16742 }
16743
16744 void
16745 ix86_expand_move (enum machine_mode mode, rtx operands[])
16746 {
16747 rtx op0, op1;
16748 enum tls_model model;
16749
16750 op0 = operands[0];
16751 op1 = operands[1];
16752
16753 if (GET_CODE (op1) == SYMBOL_REF)
16754 {
16755 rtx tmp;
16756
16757 model = SYMBOL_REF_TLS_MODEL (op1);
16758 if (model)
16759 {
16760 op1 = legitimize_tls_address (op1, model, true);
16761 op1 = force_operand (op1, op0);
16762 if (op1 == op0)
16763 return;
16764 op1 = convert_to_mode (mode, op1, 1);
16765 }
16766 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16767 op1 = tmp;
16768 }
16769 else if (GET_CODE (op1) == CONST
16770 && GET_CODE (XEXP (op1, 0)) == PLUS
16771 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16772 {
16773 rtx addend = XEXP (XEXP (op1, 0), 1);
16774 rtx symbol = XEXP (XEXP (op1, 0), 0);
16775 rtx tmp;
16776
16777 model = SYMBOL_REF_TLS_MODEL (symbol);
16778 if (model)
16779 tmp = legitimize_tls_address (symbol, model, true);
16780 else
16781 tmp = legitimize_pe_coff_symbol (symbol, true);
16782
16783 if (tmp)
16784 {
16785 tmp = force_operand (tmp, NULL);
16786 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16787 op0, 1, OPTAB_DIRECT);
16788 if (tmp == op0)
16789 return;
16790 op1 = convert_to_mode (mode, tmp, 1);
16791 }
16792 }
16793
16794 if ((flag_pic || MACHOPIC_INDIRECT)
16795 && symbolic_operand (op1, mode))
16796 {
16797 if (TARGET_MACHO && !TARGET_64BIT)
16798 {
16799 #if TARGET_MACHO
16800 /* dynamic-no-pic */
16801 if (MACHOPIC_INDIRECT)
16802 {
16803 rtx temp = ((reload_in_progress
16804 || ((op0 && REG_P (op0))
16805 && mode == Pmode))
16806 ? op0 : gen_reg_rtx (Pmode));
16807 op1 = machopic_indirect_data_reference (op1, temp);
16808 if (MACHOPIC_PURE)
16809 op1 = machopic_legitimize_pic_address (op1, mode,
16810 temp == op1 ? 0 : temp);
16811 }
16812 if (op0 != op1 && GET_CODE (op0) != MEM)
16813 {
16814 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16815 emit_insn (insn);
16816 return;
16817 }
16818 if (GET_CODE (op0) == MEM)
16819 op1 = force_reg (Pmode, op1);
16820 else
16821 {
16822 rtx temp = op0;
16823 if (GET_CODE (temp) != REG)
16824 temp = gen_reg_rtx (Pmode);
16825 temp = legitimize_pic_address (op1, temp);
16826 if (temp == op0)
16827 return;
16828 op1 = temp;
16829 }
16830 /* dynamic-no-pic */
16831 #endif
16832 }
16833 else
16834 {
16835 if (MEM_P (op0))
16836 op1 = force_reg (mode, op1);
16837 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16838 {
16839 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16840 op1 = legitimize_pic_address (op1, reg);
16841 if (op0 == op1)
16842 return;
16843 op1 = convert_to_mode (mode, op1, 1);
16844 }
16845 }
16846 }
16847 else
16848 {
16849 if (MEM_P (op0)
16850 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16851 || !push_operand (op0, mode))
16852 && MEM_P (op1))
16853 op1 = force_reg (mode, op1);
16854
16855 if (push_operand (op0, mode)
16856 && ! general_no_elim_operand (op1, mode))
16857 op1 = copy_to_mode_reg (mode, op1);
16858
16859 /* Force large constants in 64bit compilation into register
16860 to get them CSEed. */
16861 if (can_create_pseudo_p ()
16862 && (mode == DImode) && TARGET_64BIT
16863 && immediate_operand (op1, mode)
16864 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16865 && !register_operand (op0, mode)
16866 && optimize)
16867 op1 = copy_to_mode_reg (mode, op1);
16868
16869 if (can_create_pseudo_p ()
16870 && FLOAT_MODE_P (mode)
16871 && GET_CODE (op1) == CONST_DOUBLE)
16872 {
16873 /* If we are loading a floating point constant to a register,
16874 force the value to memory now, since we'll get better code
16875 out the back end. */
16876
16877 op1 = validize_mem (force_const_mem (mode, op1));
16878 if (!register_operand (op0, mode))
16879 {
16880 rtx temp = gen_reg_rtx (mode);
16881 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16882 emit_move_insn (op0, temp);
16883 return;
16884 }
16885 }
16886 }
16887
16888 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16889 }
16890
16891 void
16892 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16893 {
16894 rtx op0 = operands[0], op1 = operands[1];
16895 unsigned int align = GET_MODE_ALIGNMENT (mode);
16896
16897 if (push_operand (op0, VOIDmode))
16898 op0 = emit_move_resolve_push (mode, op0);
16899
16900 /* Force constants other than zero into memory. We do not know how
16901 the instructions used to build constants modify the upper 64 bits
16902 of the register, once we have that information we may be able
16903 to handle some of them more efficiently. */
16904 if (can_create_pseudo_p ()
16905 && register_operand (op0, mode)
16906 && (CONSTANT_P (op1)
16907 || (GET_CODE (op1) == SUBREG
16908 && CONSTANT_P (SUBREG_REG (op1))))
16909 && !standard_sse_constant_p (op1))
16910 op1 = validize_mem (force_const_mem (mode, op1));
16911
16912 /* We need to check memory alignment for SSE mode since attribute
16913 can make operands unaligned. */
16914 if (can_create_pseudo_p ()
16915 && SSE_REG_MODE_P (mode)
16916 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16917 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16918 {
16919 rtx tmp[2];
16920
16921 /* ix86_expand_vector_move_misalign() does not like constants ... */
16922 if (CONSTANT_P (op1)
16923 || (GET_CODE (op1) == SUBREG
16924 && CONSTANT_P (SUBREG_REG (op1))))
16925 op1 = validize_mem (force_const_mem (mode, op1));
16926
16927 /* ... nor both arguments in memory. */
16928 if (!register_operand (op0, mode)
16929 && !register_operand (op1, mode))
16930 op1 = force_reg (mode, op1);
16931
16932 tmp[0] = op0; tmp[1] = op1;
16933 ix86_expand_vector_move_misalign (mode, tmp);
16934 return;
16935 }
16936
16937 /* Make operand1 a register if it isn't already. */
16938 if (can_create_pseudo_p ()
16939 && !register_operand (op0, mode)
16940 && !register_operand (op1, mode))
16941 {
16942 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16943 return;
16944 }
16945
16946 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16947 }
16948
16949 /* Split 32-byte AVX unaligned load and store if needed. */
16950
16951 static void
16952 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16953 {
16954 rtx m;
16955 rtx (*extract) (rtx, rtx, rtx);
16956 rtx (*load_unaligned) (rtx, rtx);
16957 rtx (*store_unaligned) (rtx, rtx);
16958 enum machine_mode mode;
16959
16960 switch (GET_MODE (op0))
16961 {
16962 default:
16963 gcc_unreachable ();
16964 case V32QImode:
16965 extract = gen_avx_vextractf128v32qi;
16966 load_unaligned = gen_avx_loaddquv32qi;
16967 store_unaligned = gen_avx_storedquv32qi;
16968 mode = V16QImode;
16969 break;
16970 case V8SFmode:
16971 extract = gen_avx_vextractf128v8sf;
16972 load_unaligned = gen_avx_loadups256;
16973 store_unaligned = gen_avx_storeups256;
16974 mode = V4SFmode;
16975 break;
16976 case V4DFmode:
16977 extract = gen_avx_vextractf128v4df;
16978 load_unaligned = gen_avx_loadupd256;
16979 store_unaligned = gen_avx_storeupd256;
16980 mode = V2DFmode;
16981 break;
16982 }
16983
16984 if (MEM_P (op1))
16985 {
16986 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16987 {
16988 rtx r = gen_reg_rtx (mode);
16989 m = adjust_address (op1, mode, 0);
16990 emit_move_insn (r, m);
16991 m = adjust_address (op1, mode, 16);
16992 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16993 emit_move_insn (op0, r);
16994 }
16995 /* Normal *mov<mode>_internal pattern will handle
16996 unaligned loads just fine if misaligned_operand
16997 is true, and without the UNSPEC it can be combined
16998 with arithmetic instructions. */
16999 else if (misaligned_operand (op1, GET_MODE (op1)))
17000 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17001 else
17002 emit_insn (load_unaligned (op0, op1));
17003 }
17004 else if (MEM_P (op0))
17005 {
17006 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17007 {
17008 m = adjust_address (op0, mode, 0);
17009 emit_insn (extract (m, op1, const0_rtx));
17010 m = adjust_address (op0, mode, 16);
17011 emit_insn (extract (m, op1, const1_rtx));
17012 }
17013 else
17014 emit_insn (store_unaligned (op0, op1));
17015 }
17016 else
17017 gcc_unreachable ();
17018 }
17019
17020 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17021 straight to ix86_expand_vector_move. */
17022 /* Code generation for scalar reg-reg moves of single and double precision data:
17023 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17024 movaps reg, reg
17025 else
17026 movss reg, reg
17027 if (x86_sse_partial_reg_dependency == true)
17028 movapd reg, reg
17029 else
17030 movsd reg, reg
17031
17032 Code generation for scalar loads of double precision data:
17033 if (x86_sse_split_regs == true)
17034 movlpd mem, reg (gas syntax)
17035 else
17036 movsd mem, reg
17037
17038 Code generation for unaligned packed loads of single precision data
17039 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17040 if (x86_sse_unaligned_move_optimal)
17041 movups mem, reg
17042
17043 if (x86_sse_partial_reg_dependency == true)
17044 {
17045 xorps reg, reg
17046 movlps mem, reg
17047 movhps mem+8, reg
17048 }
17049 else
17050 {
17051 movlps mem, reg
17052 movhps mem+8, reg
17053 }
17054
17055 Code generation for unaligned packed loads of double precision data
17056 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17057 if (x86_sse_unaligned_move_optimal)
17058 movupd mem, reg
17059
17060 if (x86_sse_split_regs == true)
17061 {
17062 movlpd mem, reg
17063 movhpd mem+8, reg
17064 }
17065 else
17066 {
17067 movsd mem, reg
17068 movhpd mem+8, reg
17069 }
17070 */
17071
17072 void
17073 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17074 {
17075 rtx op0, op1, orig_op0 = NULL_RTX, m;
17076 rtx (*load_unaligned) (rtx, rtx);
17077 rtx (*store_unaligned) (rtx, rtx);
17078
17079 op0 = operands[0];
17080 op1 = operands[1];
17081
17082 if (GET_MODE_SIZE (mode) == 64)
17083 {
17084 switch (GET_MODE_CLASS (mode))
17085 {
17086 case MODE_VECTOR_INT:
17087 case MODE_INT:
17088 if (GET_MODE (op0) != V16SImode)
17089 {
17090 if (!MEM_P (op0))
17091 {
17092 orig_op0 = op0;
17093 op0 = gen_reg_rtx (V16SImode);
17094 }
17095 else
17096 op0 = gen_lowpart (V16SImode, op0);
17097 }
17098 op1 = gen_lowpart (V16SImode, op1);
17099 /* FALLTHRU */
17100
17101 case MODE_VECTOR_FLOAT:
17102 switch (GET_MODE (op0))
17103 {
17104 default:
17105 gcc_unreachable ();
17106 case V16SImode:
17107 load_unaligned = gen_avx512f_loaddquv16si;
17108 store_unaligned = gen_avx512f_storedquv16si;
17109 break;
17110 case V16SFmode:
17111 load_unaligned = gen_avx512f_loadups512;
17112 store_unaligned = gen_avx512f_storeups512;
17113 break;
17114 case V8DFmode:
17115 load_unaligned = gen_avx512f_loadupd512;
17116 store_unaligned = gen_avx512f_storeupd512;
17117 break;
17118 }
17119
17120 if (MEM_P (op1))
17121 emit_insn (load_unaligned (op0, op1));
17122 else if (MEM_P (op0))
17123 emit_insn (store_unaligned (op0, op1));
17124 else
17125 gcc_unreachable ();
17126 if (orig_op0)
17127 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17128 break;
17129
17130 default:
17131 gcc_unreachable ();
17132 }
17133
17134 return;
17135 }
17136
17137 if (TARGET_AVX
17138 && GET_MODE_SIZE (mode) == 32)
17139 {
17140 switch (GET_MODE_CLASS (mode))
17141 {
17142 case MODE_VECTOR_INT:
17143 case MODE_INT:
17144 if (GET_MODE (op0) != V32QImode)
17145 {
17146 if (!MEM_P (op0))
17147 {
17148 orig_op0 = op0;
17149 op0 = gen_reg_rtx (V32QImode);
17150 }
17151 else
17152 op0 = gen_lowpart (V32QImode, op0);
17153 }
17154 op1 = gen_lowpart (V32QImode, op1);
17155 /* FALLTHRU */
17156
17157 case MODE_VECTOR_FLOAT:
17158 ix86_avx256_split_vector_move_misalign (op0, op1);
17159 if (orig_op0)
17160 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17161 break;
17162
17163 default:
17164 gcc_unreachable ();
17165 }
17166
17167 return;
17168 }
17169
17170 if (MEM_P (op1))
17171 {
17172 /* Normal *mov<mode>_internal pattern will handle
17173 unaligned loads just fine if misaligned_operand
17174 is true, and without the UNSPEC it can be combined
17175 with arithmetic instructions. */
17176 if (TARGET_AVX
17177 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17178 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17179 && misaligned_operand (op1, GET_MODE (op1)))
17180 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17181 /* ??? If we have typed data, then it would appear that using
17182 movdqu is the only way to get unaligned data loaded with
17183 integer type. */
17184 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17185 {
17186 if (GET_MODE (op0) != V16QImode)
17187 {
17188 orig_op0 = op0;
17189 op0 = gen_reg_rtx (V16QImode);
17190 }
17191 op1 = gen_lowpart (V16QImode, op1);
17192 /* We will eventually emit movups based on insn attributes. */
17193 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17194 if (orig_op0)
17195 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17196 }
17197 else if (TARGET_SSE2 && mode == V2DFmode)
17198 {
17199 rtx zero;
17200
17201 if (TARGET_AVX
17202 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17203 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17204 || optimize_insn_for_size_p ())
17205 {
17206 /* We will eventually emit movups based on insn attributes. */
17207 emit_insn (gen_sse2_loadupd (op0, op1));
17208 return;
17209 }
17210
17211 /* When SSE registers are split into halves, we can avoid
17212 writing to the top half twice. */
17213 if (TARGET_SSE_SPLIT_REGS)
17214 {
17215 emit_clobber (op0);
17216 zero = op0;
17217 }
17218 else
17219 {
17220 /* ??? Not sure about the best option for the Intel chips.
17221 The following would seem to satisfy; the register is
17222 entirely cleared, breaking the dependency chain. We
17223 then store to the upper half, with a dependency depth
17224 of one. A rumor has it that Intel recommends two movsd
17225 followed by an unpacklpd, but this is unconfirmed. And
17226 given that the dependency depth of the unpacklpd would
17227 still be one, I'm not sure why this would be better. */
17228 zero = CONST0_RTX (V2DFmode);
17229 }
17230
17231 m = adjust_address (op1, DFmode, 0);
17232 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17233 m = adjust_address (op1, DFmode, 8);
17234 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17235 }
17236 else
17237 {
17238 rtx t;
17239
17240 if (TARGET_AVX
17241 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17242 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17243 || optimize_insn_for_size_p ())
17244 {
17245 if (GET_MODE (op0) != V4SFmode)
17246 {
17247 orig_op0 = op0;
17248 op0 = gen_reg_rtx (V4SFmode);
17249 }
17250 op1 = gen_lowpart (V4SFmode, op1);
17251 emit_insn (gen_sse_loadups (op0, op1));
17252 if (orig_op0)
17253 emit_move_insn (orig_op0,
17254 gen_lowpart (GET_MODE (orig_op0), op0));
17255 return;
17256 }
17257
17258 if (mode != V4SFmode)
17259 t = gen_reg_rtx (V4SFmode);
17260 else
17261 t = op0;
17262
17263 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17264 emit_move_insn (t, CONST0_RTX (V4SFmode));
17265 else
17266 emit_clobber (t);
17267
17268 m = adjust_address (op1, V2SFmode, 0);
17269 emit_insn (gen_sse_loadlps (t, t, m));
17270 m = adjust_address (op1, V2SFmode, 8);
17271 emit_insn (gen_sse_loadhps (t, t, m));
17272 if (mode != V4SFmode)
17273 emit_move_insn (op0, gen_lowpart (mode, t));
17274 }
17275 }
17276 else if (MEM_P (op0))
17277 {
17278 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17279 {
17280 op0 = gen_lowpart (V16QImode, op0);
17281 op1 = gen_lowpart (V16QImode, op1);
17282 /* We will eventually emit movups based on insn attributes. */
17283 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17284 }
17285 else if (TARGET_SSE2 && mode == V2DFmode)
17286 {
17287 if (TARGET_AVX
17288 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17289 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17290 || optimize_insn_for_size_p ())
17291 /* We will eventually emit movups based on insn attributes. */
17292 emit_insn (gen_sse2_storeupd (op0, op1));
17293 else
17294 {
17295 m = adjust_address (op0, DFmode, 0);
17296 emit_insn (gen_sse2_storelpd (m, op1));
17297 m = adjust_address (op0, DFmode, 8);
17298 emit_insn (gen_sse2_storehpd (m, op1));
17299 }
17300 }
17301 else
17302 {
17303 if (mode != V4SFmode)
17304 op1 = gen_lowpart (V4SFmode, op1);
17305
17306 if (TARGET_AVX
17307 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17308 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17309 || optimize_insn_for_size_p ())
17310 {
17311 op0 = gen_lowpart (V4SFmode, op0);
17312 emit_insn (gen_sse_storeups (op0, op1));
17313 }
17314 else
17315 {
17316 m = adjust_address (op0, V2SFmode, 0);
17317 emit_insn (gen_sse_storelps (m, op1));
17318 m = adjust_address (op0, V2SFmode, 8);
17319 emit_insn (gen_sse_storehps (m, op1));
17320 }
17321 }
17322 }
17323 else
17324 gcc_unreachable ();
17325 }
17326
17327 /* Helper function of ix86_fixup_binary_operands to canonicalize
17328 operand order. Returns true if the operands should be swapped. */
17329
17330 static bool
17331 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17332 rtx operands[])
17333 {
17334 rtx dst = operands[0];
17335 rtx src1 = operands[1];
17336 rtx src2 = operands[2];
17337
17338 /* If the operation is not commutative, we can't do anything. */
17339 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17340 return false;
17341
17342 /* Highest priority is that src1 should match dst. */
17343 if (rtx_equal_p (dst, src1))
17344 return false;
17345 if (rtx_equal_p (dst, src2))
17346 return true;
17347
17348 /* Next highest priority is that immediate constants come second. */
17349 if (immediate_operand (src2, mode))
17350 return false;
17351 if (immediate_operand (src1, mode))
17352 return true;
17353
17354 /* Lowest priority is that memory references should come second. */
17355 if (MEM_P (src2))
17356 return false;
17357 if (MEM_P (src1))
17358 return true;
17359
17360 return false;
17361 }
17362
17363
17364 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17365 destination to use for the operation. If different from the true
17366 destination in operands[0], a copy operation will be required. */
17367
17368 rtx
17369 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17370 rtx operands[])
17371 {
17372 rtx dst = operands[0];
17373 rtx src1 = operands[1];
17374 rtx src2 = operands[2];
17375
17376 /* Canonicalize operand order. */
17377 if (ix86_swap_binary_operands_p (code, mode, operands))
17378 {
17379 rtx temp;
17380
17381 /* It is invalid to swap operands of different modes. */
17382 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17383
17384 temp = src1;
17385 src1 = src2;
17386 src2 = temp;
17387 }
17388
17389 /* Both source operands cannot be in memory. */
17390 if (MEM_P (src1) && MEM_P (src2))
17391 {
17392 /* Optimization: Only read from memory once. */
17393 if (rtx_equal_p (src1, src2))
17394 {
17395 src2 = force_reg (mode, src2);
17396 src1 = src2;
17397 }
17398 else if (rtx_equal_p (dst, src1))
17399 src2 = force_reg (mode, src2);
17400 else
17401 src1 = force_reg (mode, src1);
17402 }
17403
17404 /* If the destination is memory, and we do not have matching source
17405 operands, do things in registers. */
17406 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17407 dst = gen_reg_rtx (mode);
17408
17409 /* Source 1 cannot be a constant. */
17410 if (CONSTANT_P (src1))
17411 src1 = force_reg (mode, src1);
17412
17413 /* Source 1 cannot be a non-matching memory. */
17414 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17415 src1 = force_reg (mode, src1);
17416
17417 /* Improve address combine. */
17418 if (code == PLUS
17419 && GET_MODE_CLASS (mode) == MODE_INT
17420 && MEM_P (src2))
17421 src2 = force_reg (mode, src2);
17422
17423 operands[1] = src1;
17424 operands[2] = src2;
17425 return dst;
17426 }
17427
17428 /* Similarly, but assume that the destination has already been
17429 set up properly. */
17430
17431 void
17432 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17433 enum machine_mode mode, rtx operands[])
17434 {
17435 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17436 gcc_assert (dst == operands[0]);
17437 }
17438
17439 /* Attempt to expand a binary operator. Make the expansion closer to the
17440 actual machine, then just general_operand, which will allow 3 separate
17441 memory references (one output, two input) in a single insn. */
17442
17443 void
17444 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17445 rtx operands[])
17446 {
17447 rtx src1, src2, dst, op, clob;
17448
17449 dst = ix86_fixup_binary_operands (code, mode, operands);
17450 src1 = operands[1];
17451 src2 = operands[2];
17452
17453 /* Emit the instruction. */
17454
17455 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17456 if (reload_in_progress)
17457 {
17458 /* Reload doesn't know about the flags register, and doesn't know that
17459 it doesn't want to clobber it. We can only do this with PLUS. */
17460 gcc_assert (code == PLUS);
17461 emit_insn (op);
17462 }
17463 else if (reload_completed
17464 && code == PLUS
17465 && !rtx_equal_p (dst, src1))
17466 {
17467 /* This is going to be an LEA; avoid splitting it later. */
17468 emit_insn (op);
17469 }
17470 else
17471 {
17472 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17473 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17474 }
17475
17476 /* Fix up the destination if needed. */
17477 if (dst != operands[0])
17478 emit_move_insn (operands[0], dst);
17479 }
17480
17481 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17482 the given OPERANDS. */
17483
17484 void
17485 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17486 rtx operands[])
17487 {
17488 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17489 if (GET_CODE (operands[1]) == SUBREG)
17490 {
17491 op1 = operands[1];
17492 op2 = operands[2];
17493 }
17494 else if (GET_CODE (operands[2]) == SUBREG)
17495 {
17496 op1 = operands[2];
17497 op2 = operands[1];
17498 }
17499 /* Optimize (__m128i) d | (__m128i) e and similar code
17500 when d and e are float vectors into float vector logical
17501 insn. In C/C++ without using intrinsics there is no other way
17502 to express vector logical operation on float vectors than
17503 to cast them temporarily to integer vectors. */
17504 if (op1
17505 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17506 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17507 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17508 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17509 && SUBREG_BYTE (op1) == 0
17510 && (GET_CODE (op2) == CONST_VECTOR
17511 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17512 && SUBREG_BYTE (op2) == 0))
17513 && can_create_pseudo_p ())
17514 {
17515 rtx dst;
17516 switch (GET_MODE (SUBREG_REG (op1)))
17517 {
17518 case V4SFmode:
17519 case V8SFmode:
17520 case V2DFmode:
17521 case V4DFmode:
17522 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17523 if (GET_CODE (op2) == CONST_VECTOR)
17524 {
17525 op2 = gen_lowpart (GET_MODE (dst), op2);
17526 op2 = force_reg (GET_MODE (dst), op2);
17527 }
17528 else
17529 {
17530 op1 = operands[1];
17531 op2 = SUBREG_REG (operands[2]);
17532 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17533 op2 = force_reg (GET_MODE (dst), op2);
17534 }
17535 op1 = SUBREG_REG (op1);
17536 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17537 op1 = force_reg (GET_MODE (dst), op1);
17538 emit_insn (gen_rtx_SET (VOIDmode, dst,
17539 gen_rtx_fmt_ee (code, GET_MODE (dst),
17540 op1, op2)));
17541 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17542 return;
17543 default:
17544 break;
17545 }
17546 }
17547 if (!nonimmediate_operand (operands[1], mode))
17548 operands[1] = force_reg (mode, operands[1]);
17549 if (!nonimmediate_operand (operands[2], mode))
17550 operands[2] = force_reg (mode, operands[2]);
17551 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17552 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17553 gen_rtx_fmt_ee (code, mode, operands[1],
17554 operands[2])));
17555 }
17556
17557 /* Return TRUE or FALSE depending on whether the binary operator meets the
17558 appropriate constraints. */
17559
17560 bool
17561 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17562 rtx operands[3])
17563 {
17564 rtx dst = operands[0];
17565 rtx src1 = operands[1];
17566 rtx src2 = operands[2];
17567
17568 /* Both source operands cannot be in memory. */
17569 if (MEM_P (src1) && MEM_P (src2))
17570 return false;
17571
17572 /* Canonicalize operand order for commutative operators. */
17573 if (ix86_swap_binary_operands_p (code, mode, operands))
17574 {
17575 rtx temp = src1;
17576 src1 = src2;
17577 src2 = temp;
17578 }
17579
17580 /* If the destination is memory, we must have a matching source operand. */
17581 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17582 return false;
17583
17584 /* Source 1 cannot be a constant. */
17585 if (CONSTANT_P (src1))
17586 return false;
17587
17588 /* Source 1 cannot be a non-matching memory. */
17589 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17590 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17591 return (code == AND
17592 && (mode == HImode
17593 || mode == SImode
17594 || (TARGET_64BIT && mode == DImode))
17595 && satisfies_constraint_L (src2));
17596
17597 return true;
17598 }
17599
17600 /* Attempt to expand a unary operator. Make the expansion closer to the
17601 actual machine, then just general_operand, which will allow 2 separate
17602 memory references (one output, one input) in a single insn. */
17603
17604 void
17605 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17606 rtx operands[])
17607 {
17608 int matching_memory;
17609 rtx src, dst, op, clob;
17610
17611 dst = operands[0];
17612 src = operands[1];
17613
17614 /* If the destination is memory, and we do not have matching source
17615 operands, do things in registers. */
17616 matching_memory = 0;
17617 if (MEM_P (dst))
17618 {
17619 if (rtx_equal_p (dst, src))
17620 matching_memory = 1;
17621 else
17622 dst = gen_reg_rtx (mode);
17623 }
17624
17625 /* When source operand is memory, destination must match. */
17626 if (MEM_P (src) && !matching_memory)
17627 src = force_reg (mode, src);
17628
17629 /* Emit the instruction. */
17630
17631 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17632 if (reload_in_progress || code == NOT)
17633 {
17634 /* Reload doesn't know about the flags register, and doesn't know that
17635 it doesn't want to clobber it. */
17636 gcc_assert (code == NOT);
17637 emit_insn (op);
17638 }
17639 else
17640 {
17641 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17642 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17643 }
17644
17645 /* Fix up the destination if needed. */
17646 if (dst != operands[0])
17647 emit_move_insn (operands[0], dst);
17648 }
17649
17650 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17651 divisor are within the range [0-255]. */
17652
17653 void
17654 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17655 bool signed_p)
17656 {
17657 rtx end_label, qimode_label;
17658 rtx insn, div, mod;
17659 rtx scratch, tmp0, tmp1, tmp2;
17660 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17661 rtx (*gen_zero_extend) (rtx, rtx);
17662 rtx (*gen_test_ccno_1) (rtx, rtx);
17663
17664 switch (mode)
17665 {
17666 case SImode:
17667 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17668 gen_test_ccno_1 = gen_testsi_ccno_1;
17669 gen_zero_extend = gen_zero_extendqisi2;
17670 break;
17671 case DImode:
17672 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17673 gen_test_ccno_1 = gen_testdi_ccno_1;
17674 gen_zero_extend = gen_zero_extendqidi2;
17675 break;
17676 default:
17677 gcc_unreachable ();
17678 }
17679
17680 end_label = gen_label_rtx ();
17681 qimode_label = gen_label_rtx ();
17682
17683 scratch = gen_reg_rtx (mode);
17684
17685 /* Use 8bit unsigned divimod if dividend and divisor are within
17686 the range [0-255]. */
17687 emit_move_insn (scratch, operands[2]);
17688 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17689 scratch, 1, OPTAB_DIRECT);
17690 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17691 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17692 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17693 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17694 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17695 pc_rtx);
17696 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17697 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17698 JUMP_LABEL (insn) = qimode_label;
17699
17700 /* Generate original signed/unsigned divimod. */
17701 div = gen_divmod4_1 (operands[0], operands[1],
17702 operands[2], operands[3]);
17703 emit_insn (div);
17704
17705 /* Branch to the end. */
17706 emit_jump_insn (gen_jump (end_label));
17707 emit_barrier ();
17708
17709 /* Generate 8bit unsigned divide. */
17710 emit_label (qimode_label);
17711 /* Don't use operands[0] for result of 8bit divide since not all
17712 registers support QImode ZERO_EXTRACT. */
17713 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17714 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17715 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17716 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17717
17718 if (signed_p)
17719 {
17720 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17721 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17722 }
17723 else
17724 {
17725 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17726 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17727 }
17728
17729 /* Extract remainder from AH. */
17730 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17731 if (REG_P (operands[1]))
17732 insn = emit_move_insn (operands[1], tmp1);
17733 else
17734 {
17735 /* Need a new scratch register since the old one has result
17736 of 8bit divide. */
17737 scratch = gen_reg_rtx (mode);
17738 emit_move_insn (scratch, tmp1);
17739 insn = emit_move_insn (operands[1], scratch);
17740 }
17741 set_unique_reg_note (insn, REG_EQUAL, mod);
17742
17743 /* Zero extend quotient from AL. */
17744 tmp1 = gen_lowpart (QImode, tmp0);
17745 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17746 set_unique_reg_note (insn, REG_EQUAL, div);
17747
17748 emit_label (end_label);
17749 }
17750
17751 /* Whether it is OK to emit CFI directives when emitting asm code. */
17752
17753 bool
17754 ix86_emit_cfi ()
17755 {
17756 return dwarf2out_do_cfi_asm ();
17757 }
17758
17759 #define LEA_MAX_STALL (3)
17760 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17761
17762 /* Increase given DISTANCE in half-cycles according to
17763 dependencies between PREV and NEXT instructions.
17764 Add 1 half-cycle if there is no dependency and
17765 go to next cycle if there is some dependecy. */
17766
17767 static unsigned int
17768 increase_distance (rtx prev, rtx next, unsigned int distance)
17769 {
17770 df_ref def, use;
17771
17772 if (!prev || !next)
17773 return distance + (distance & 1) + 2;
17774
17775 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17776 return distance + 1;
17777
17778 FOR_EACH_INSN_USE (use, next)
17779 FOR_EACH_INSN_DEF (def, prev)
17780 if (!DF_REF_IS_ARTIFICIAL (def)
17781 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17782 return distance + (distance & 1) + 2;
17783
17784 return distance + 1;
17785 }
17786
17787 /* Function checks if instruction INSN defines register number
17788 REGNO1 or REGNO2. */
17789
17790 static bool
17791 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17792 rtx insn)
17793 {
17794 df_ref def;
17795
17796 FOR_EACH_INSN_DEF (def, insn)
17797 if (DF_REF_REG_DEF_P (def)
17798 && !DF_REF_IS_ARTIFICIAL (def)
17799 && (regno1 == DF_REF_REGNO (def)
17800 || regno2 == DF_REF_REGNO (def)))
17801 return true;
17802
17803 return false;
17804 }
17805
17806 /* Function checks if instruction INSN uses register number
17807 REGNO as a part of address expression. */
17808
17809 static bool
17810 insn_uses_reg_mem (unsigned int regno, rtx insn)
17811 {
17812 df_ref use;
17813
17814 FOR_EACH_INSN_USE (use, insn)
17815 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17816 return true;
17817
17818 return false;
17819 }
17820
17821 /* Search backward for non-agu definition of register number REGNO1
17822 or register number REGNO2 in basic block starting from instruction
17823 START up to head of basic block or instruction INSN.
17824
17825 Function puts true value into *FOUND var if definition was found
17826 and false otherwise.
17827
17828 Distance in half-cycles between START and found instruction or head
17829 of BB is added to DISTANCE and returned. */
17830
17831 static int
17832 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17833 rtx insn, int distance,
17834 rtx start, bool *found)
17835 {
17836 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17837 rtx prev = start;
17838 rtx next = NULL;
17839
17840 *found = false;
17841
17842 while (prev
17843 && prev != insn
17844 && distance < LEA_SEARCH_THRESHOLD)
17845 {
17846 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17847 {
17848 distance = increase_distance (prev, next, distance);
17849 if (insn_defines_reg (regno1, regno2, prev))
17850 {
17851 if (recog_memoized (prev) < 0
17852 || get_attr_type (prev) != TYPE_LEA)
17853 {
17854 *found = true;
17855 return distance;
17856 }
17857 }
17858
17859 next = prev;
17860 }
17861 if (prev == BB_HEAD (bb))
17862 break;
17863
17864 prev = PREV_INSN (prev);
17865 }
17866
17867 return distance;
17868 }
17869
17870 /* Search backward for non-agu definition of register number REGNO1
17871 or register number REGNO2 in INSN's basic block until
17872 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17873 2. Reach neighbour BBs boundary, or
17874 3. Reach agu definition.
17875 Returns the distance between the non-agu definition point and INSN.
17876 If no definition point, returns -1. */
17877
17878 static int
17879 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17880 rtx insn)
17881 {
17882 basic_block bb = BLOCK_FOR_INSN (insn);
17883 int distance = 0;
17884 bool found = false;
17885
17886 if (insn != BB_HEAD (bb))
17887 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17888 distance, PREV_INSN (insn),
17889 &found);
17890
17891 if (!found && distance < LEA_SEARCH_THRESHOLD)
17892 {
17893 edge e;
17894 edge_iterator ei;
17895 bool simple_loop = false;
17896
17897 FOR_EACH_EDGE (e, ei, bb->preds)
17898 if (e->src == bb)
17899 {
17900 simple_loop = true;
17901 break;
17902 }
17903
17904 if (simple_loop)
17905 distance = distance_non_agu_define_in_bb (regno1, regno2,
17906 insn, distance,
17907 BB_END (bb), &found);
17908 else
17909 {
17910 int shortest_dist = -1;
17911 bool found_in_bb = false;
17912
17913 FOR_EACH_EDGE (e, ei, bb->preds)
17914 {
17915 int bb_dist
17916 = distance_non_agu_define_in_bb (regno1, regno2,
17917 insn, distance,
17918 BB_END (e->src),
17919 &found_in_bb);
17920 if (found_in_bb)
17921 {
17922 if (shortest_dist < 0)
17923 shortest_dist = bb_dist;
17924 else if (bb_dist > 0)
17925 shortest_dist = MIN (bb_dist, shortest_dist);
17926
17927 found = true;
17928 }
17929 }
17930
17931 distance = shortest_dist;
17932 }
17933 }
17934
17935 /* get_attr_type may modify recog data. We want to make sure
17936 that recog data is valid for instruction INSN, on which
17937 distance_non_agu_define is called. INSN is unchanged here. */
17938 extract_insn_cached (insn);
17939
17940 if (!found)
17941 return -1;
17942
17943 return distance >> 1;
17944 }
17945
17946 /* Return the distance in half-cycles between INSN and the next
17947 insn that uses register number REGNO in memory address added
17948 to DISTANCE. Return -1 if REGNO0 is set.
17949
17950 Put true value into *FOUND if register usage was found and
17951 false otherwise.
17952 Put true value into *REDEFINED if register redefinition was
17953 found and false otherwise. */
17954
17955 static int
17956 distance_agu_use_in_bb (unsigned int regno,
17957 rtx insn, int distance, rtx start,
17958 bool *found, bool *redefined)
17959 {
17960 basic_block bb = NULL;
17961 rtx next = start;
17962 rtx prev = NULL;
17963
17964 *found = false;
17965 *redefined = false;
17966
17967 if (start != NULL_RTX)
17968 {
17969 bb = BLOCK_FOR_INSN (start);
17970 if (start != BB_HEAD (bb))
17971 /* If insn and start belong to the same bb, set prev to insn,
17972 so the call to increase_distance will increase the distance
17973 between insns by 1. */
17974 prev = insn;
17975 }
17976
17977 while (next
17978 && next != insn
17979 && distance < LEA_SEARCH_THRESHOLD)
17980 {
17981 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17982 {
17983 distance = increase_distance(prev, next, distance);
17984 if (insn_uses_reg_mem (regno, next))
17985 {
17986 /* Return DISTANCE if OP0 is used in memory
17987 address in NEXT. */
17988 *found = true;
17989 return distance;
17990 }
17991
17992 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17993 {
17994 /* Return -1 if OP0 is set in NEXT. */
17995 *redefined = true;
17996 return -1;
17997 }
17998
17999 prev = next;
18000 }
18001
18002 if (next == BB_END (bb))
18003 break;
18004
18005 next = NEXT_INSN (next);
18006 }
18007
18008 return distance;
18009 }
18010
18011 /* Return the distance between INSN and the next insn that uses
18012 register number REGNO0 in memory address. Return -1 if no such
18013 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18014
18015 static int
18016 distance_agu_use (unsigned int regno0, rtx insn)
18017 {
18018 basic_block bb = BLOCK_FOR_INSN (insn);
18019 int distance = 0;
18020 bool found = false;
18021 bool redefined = false;
18022
18023 if (insn != BB_END (bb))
18024 distance = distance_agu_use_in_bb (regno0, insn, distance,
18025 NEXT_INSN (insn),
18026 &found, &redefined);
18027
18028 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18029 {
18030 edge e;
18031 edge_iterator ei;
18032 bool simple_loop = false;
18033
18034 FOR_EACH_EDGE (e, ei, bb->succs)
18035 if (e->dest == bb)
18036 {
18037 simple_loop = true;
18038 break;
18039 }
18040
18041 if (simple_loop)
18042 distance = distance_agu_use_in_bb (regno0, insn,
18043 distance, BB_HEAD (bb),
18044 &found, &redefined);
18045 else
18046 {
18047 int shortest_dist = -1;
18048 bool found_in_bb = false;
18049 bool redefined_in_bb = false;
18050
18051 FOR_EACH_EDGE (e, ei, bb->succs)
18052 {
18053 int bb_dist
18054 = distance_agu_use_in_bb (regno0, insn,
18055 distance, BB_HEAD (e->dest),
18056 &found_in_bb, &redefined_in_bb);
18057 if (found_in_bb)
18058 {
18059 if (shortest_dist < 0)
18060 shortest_dist = bb_dist;
18061 else if (bb_dist > 0)
18062 shortest_dist = MIN (bb_dist, shortest_dist);
18063
18064 found = true;
18065 }
18066 }
18067
18068 distance = shortest_dist;
18069 }
18070 }
18071
18072 if (!found || redefined)
18073 return -1;
18074
18075 return distance >> 1;
18076 }
18077
18078 /* Define this macro to tune LEA priority vs ADD, it take effect when
18079 there is a dilemma of choicing LEA or ADD
18080 Negative value: ADD is more preferred than LEA
18081 Zero: Netrual
18082 Positive value: LEA is more preferred than ADD*/
18083 #define IX86_LEA_PRIORITY 0
18084
18085 /* Return true if usage of lea INSN has performance advantage
18086 over a sequence of instructions. Instructions sequence has
18087 SPLIT_COST cycles higher latency than lea latency. */
18088
18089 static bool
18090 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18091 unsigned int regno2, int split_cost, bool has_scale)
18092 {
18093 int dist_define, dist_use;
18094
18095 /* For Silvermont if using a 2-source or 3-source LEA for
18096 non-destructive destination purposes, or due to wanting
18097 ability to use SCALE, the use of LEA is justified. */
18098 if (TARGET_SILVERMONT || TARGET_INTEL)
18099 {
18100 if (has_scale)
18101 return true;
18102 if (split_cost < 1)
18103 return false;
18104 if (regno0 == regno1 || regno0 == regno2)
18105 return false;
18106 return true;
18107 }
18108
18109 dist_define = distance_non_agu_define (regno1, regno2, insn);
18110 dist_use = distance_agu_use (regno0, insn);
18111
18112 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18113 {
18114 /* If there is no non AGU operand definition, no AGU
18115 operand usage and split cost is 0 then both lea
18116 and non lea variants have same priority. Currently
18117 we prefer lea for 64 bit code and non lea on 32 bit
18118 code. */
18119 if (dist_use < 0 && split_cost == 0)
18120 return TARGET_64BIT || IX86_LEA_PRIORITY;
18121 else
18122 return true;
18123 }
18124
18125 /* With longer definitions distance lea is more preferable.
18126 Here we change it to take into account splitting cost and
18127 lea priority. */
18128 dist_define += split_cost + IX86_LEA_PRIORITY;
18129
18130 /* If there is no use in memory addess then we just check
18131 that split cost exceeds AGU stall. */
18132 if (dist_use < 0)
18133 return dist_define > LEA_MAX_STALL;
18134
18135 /* If this insn has both backward non-agu dependence and forward
18136 agu dependence, the one with short distance takes effect. */
18137 return dist_define >= dist_use;
18138 }
18139
18140 /* Return true if it is legal to clobber flags by INSN and
18141 false otherwise. */
18142
18143 static bool
18144 ix86_ok_to_clobber_flags (rtx insn)
18145 {
18146 basic_block bb = BLOCK_FOR_INSN (insn);
18147 df_ref use;
18148 bitmap live;
18149
18150 while (insn)
18151 {
18152 if (NONDEBUG_INSN_P (insn))
18153 {
18154 FOR_EACH_INSN_USE (use, insn)
18155 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18156 return false;
18157
18158 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18159 return true;
18160 }
18161
18162 if (insn == BB_END (bb))
18163 break;
18164
18165 insn = NEXT_INSN (insn);
18166 }
18167
18168 live = df_get_live_out(bb);
18169 return !REGNO_REG_SET_P (live, FLAGS_REG);
18170 }
18171
18172 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18173 move and add to avoid AGU stalls. */
18174
18175 bool
18176 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18177 {
18178 unsigned int regno0, regno1, regno2;
18179
18180 /* Check if we need to optimize. */
18181 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18182 return false;
18183
18184 /* Check it is correct to split here. */
18185 if (!ix86_ok_to_clobber_flags(insn))
18186 return false;
18187
18188 regno0 = true_regnum (operands[0]);
18189 regno1 = true_regnum (operands[1]);
18190 regno2 = true_regnum (operands[2]);
18191
18192 /* We need to split only adds with non destructive
18193 destination operand. */
18194 if (regno0 == regno1 || regno0 == regno2)
18195 return false;
18196 else
18197 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18198 }
18199
18200 /* Return true if we should emit lea instruction instead of mov
18201 instruction. */
18202
18203 bool
18204 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18205 {
18206 unsigned int regno0, regno1;
18207
18208 /* Check if we need to optimize. */
18209 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18210 return false;
18211
18212 /* Use lea for reg to reg moves only. */
18213 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18214 return false;
18215
18216 regno0 = true_regnum (operands[0]);
18217 regno1 = true_regnum (operands[1]);
18218
18219 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18220 }
18221
18222 /* Return true if we need to split lea into a sequence of
18223 instructions to avoid AGU stalls. */
18224
18225 bool
18226 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18227 {
18228 unsigned int regno0, regno1, regno2;
18229 int split_cost;
18230 struct ix86_address parts;
18231 int ok;
18232
18233 /* Check we need to optimize. */
18234 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18235 return false;
18236
18237 /* The "at least two components" test below might not catch simple
18238 move or zero extension insns if parts.base is non-NULL and parts.disp
18239 is const0_rtx as the only components in the address, e.g. if the
18240 register is %rbp or %r13. As this test is much cheaper and moves or
18241 zero extensions are the common case, do this check first. */
18242 if (REG_P (operands[1])
18243 || (SImode_address_operand (operands[1], VOIDmode)
18244 && REG_P (XEXP (operands[1], 0))))
18245 return false;
18246
18247 /* Check if it is OK to split here. */
18248 if (!ix86_ok_to_clobber_flags (insn))
18249 return false;
18250
18251 ok = ix86_decompose_address (operands[1], &parts);
18252 gcc_assert (ok);
18253
18254 /* There should be at least two components in the address. */
18255 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18256 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18257 return false;
18258
18259 /* We should not split into add if non legitimate pic
18260 operand is used as displacement. */
18261 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18262 return false;
18263
18264 regno0 = true_regnum (operands[0]) ;
18265 regno1 = INVALID_REGNUM;
18266 regno2 = INVALID_REGNUM;
18267
18268 if (parts.base)
18269 regno1 = true_regnum (parts.base);
18270 if (parts.index)
18271 regno2 = true_regnum (parts.index);
18272
18273 split_cost = 0;
18274
18275 /* Compute how many cycles we will add to execution time
18276 if split lea into a sequence of instructions. */
18277 if (parts.base || parts.index)
18278 {
18279 /* Have to use mov instruction if non desctructive
18280 destination form is used. */
18281 if (regno1 != regno0 && regno2 != regno0)
18282 split_cost += 1;
18283
18284 /* Have to add index to base if both exist. */
18285 if (parts.base && parts.index)
18286 split_cost += 1;
18287
18288 /* Have to use shift and adds if scale is 2 or greater. */
18289 if (parts.scale > 1)
18290 {
18291 if (regno0 != regno1)
18292 split_cost += 1;
18293 else if (regno2 == regno0)
18294 split_cost += 4;
18295 else
18296 split_cost += parts.scale;
18297 }
18298
18299 /* Have to use add instruction with immediate if
18300 disp is non zero. */
18301 if (parts.disp && parts.disp != const0_rtx)
18302 split_cost += 1;
18303
18304 /* Subtract the price of lea. */
18305 split_cost -= 1;
18306 }
18307
18308 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18309 parts.scale > 1);
18310 }
18311
18312 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18313 matches destination. RTX includes clobber of FLAGS_REG. */
18314
18315 static void
18316 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18317 rtx dst, rtx src)
18318 {
18319 rtx op, clob;
18320
18321 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18322 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18323
18324 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18325 }
18326
18327 /* Return true if regno1 def is nearest to the insn. */
18328
18329 static bool
18330 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18331 {
18332 rtx prev = insn;
18333 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18334
18335 if (insn == start)
18336 return false;
18337 while (prev && prev != start)
18338 {
18339 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18340 {
18341 prev = PREV_INSN (prev);
18342 continue;
18343 }
18344 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18345 return true;
18346 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18347 return false;
18348 prev = PREV_INSN (prev);
18349 }
18350
18351 /* None of the regs is defined in the bb. */
18352 return false;
18353 }
18354
18355 /* Split lea instructions into a sequence of instructions
18356 which are executed on ALU to avoid AGU stalls.
18357 It is assumed that it is allowed to clobber flags register
18358 at lea position. */
18359
18360 void
18361 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18362 {
18363 unsigned int regno0, regno1, regno2;
18364 struct ix86_address parts;
18365 rtx target, tmp;
18366 int ok, adds;
18367
18368 ok = ix86_decompose_address (operands[1], &parts);
18369 gcc_assert (ok);
18370
18371 target = gen_lowpart (mode, operands[0]);
18372
18373 regno0 = true_regnum (target);
18374 regno1 = INVALID_REGNUM;
18375 regno2 = INVALID_REGNUM;
18376
18377 if (parts.base)
18378 {
18379 parts.base = gen_lowpart (mode, parts.base);
18380 regno1 = true_regnum (parts.base);
18381 }
18382
18383 if (parts.index)
18384 {
18385 parts.index = gen_lowpart (mode, parts.index);
18386 regno2 = true_regnum (parts.index);
18387 }
18388
18389 if (parts.disp)
18390 parts.disp = gen_lowpart (mode, parts.disp);
18391
18392 if (parts.scale > 1)
18393 {
18394 /* Case r1 = r1 + ... */
18395 if (regno1 == regno0)
18396 {
18397 /* If we have a case r1 = r1 + C * r2 then we
18398 should use multiplication which is very
18399 expensive. Assume cost model is wrong if we
18400 have such case here. */
18401 gcc_assert (regno2 != regno0);
18402
18403 for (adds = parts.scale; adds > 0; adds--)
18404 ix86_emit_binop (PLUS, mode, target, parts.index);
18405 }
18406 else
18407 {
18408 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18409 if (regno0 != regno2)
18410 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18411
18412 /* Use shift for scaling. */
18413 ix86_emit_binop (ASHIFT, mode, target,
18414 GEN_INT (exact_log2 (parts.scale)));
18415
18416 if (parts.base)
18417 ix86_emit_binop (PLUS, mode, target, parts.base);
18418
18419 if (parts.disp && parts.disp != const0_rtx)
18420 ix86_emit_binop (PLUS, mode, target, parts.disp);
18421 }
18422 }
18423 else if (!parts.base && !parts.index)
18424 {
18425 gcc_assert(parts.disp);
18426 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18427 }
18428 else
18429 {
18430 if (!parts.base)
18431 {
18432 if (regno0 != regno2)
18433 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18434 }
18435 else if (!parts.index)
18436 {
18437 if (regno0 != regno1)
18438 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18439 }
18440 else
18441 {
18442 if (regno0 == regno1)
18443 tmp = parts.index;
18444 else if (regno0 == regno2)
18445 tmp = parts.base;
18446 else
18447 {
18448 rtx tmp1;
18449
18450 /* Find better operand for SET instruction, depending
18451 on which definition is farther from the insn. */
18452 if (find_nearest_reg_def (insn, regno1, regno2))
18453 tmp = parts.index, tmp1 = parts.base;
18454 else
18455 tmp = parts.base, tmp1 = parts.index;
18456
18457 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18458
18459 if (parts.disp && parts.disp != const0_rtx)
18460 ix86_emit_binop (PLUS, mode, target, parts.disp);
18461
18462 ix86_emit_binop (PLUS, mode, target, tmp1);
18463 return;
18464 }
18465
18466 ix86_emit_binop (PLUS, mode, target, tmp);
18467 }
18468
18469 if (parts.disp && parts.disp != const0_rtx)
18470 ix86_emit_binop (PLUS, mode, target, parts.disp);
18471 }
18472 }
18473
18474 /* Return true if it is ok to optimize an ADD operation to LEA
18475 operation to avoid flag register consumation. For most processors,
18476 ADD is faster than LEA. For the processors like BONNELL, if the
18477 destination register of LEA holds an actual address which will be
18478 used soon, LEA is better and otherwise ADD is better. */
18479
18480 bool
18481 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18482 {
18483 unsigned int regno0 = true_regnum (operands[0]);
18484 unsigned int regno1 = true_regnum (operands[1]);
18485 unsigned int regno2 = true_regnum (operands[2]);
18486
18487 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18488 if (regno0 != regno1 && regno0 != regno2)
18489 return true;
18490
18491 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18492 return false;
18493
18494 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18495 }
18496
18497 /* Return true if destination reg of SET_BODY is shift count of
18498 USE_BODY. */
18499
18500 static bool
18501 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18502 {
18503 rtx set_dest;
18504 rtx shift_rtx;
18505 int i;
18506
18507 /* Retrieve destination of SET_BODY. */
18508 switch (GET_CODE (set_body))
18509 {
18510 case SET:
18511 set_dest = SET_DEST (set_body);
18512 if (!set_dest || !REG_P (set_dest))
18513 return false;
18514 break;
18515 case PARALLEL:
18516 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18517 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18518 use_body))
18519 return true;
18520 default:
18521 return false;
18522 break;
18523 }
18524
18525 /* Retrieve shift count of USE_BODY. */
18526 switch (GET_CODE (use_body))
18527 {
18528 case SET:
18529 shift_rtx = XEXP (use_body, 1);
18530 break;
18531 case PARALLEL:
18532 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18533 if (ix86_dep_by_shift_count_body (set_body,
18534 XVECEXP (use_body, 0, i)))
18535 return true;
18536 default:
18537 return false;
18538 break;
18539 }
18540
18541 if (shift_rtx
18542 && (GET_CODE (shift_rtx) == ASHIFT
18543 || GET_CODE (shift_rtx) == LSHIFTRT
18544 || GET_CODE (shift_rtx) == ASHIFTRT
18545 || GET_CODE (shift_rtx) == ROTATE
18546 || GET_CODE (shift_rtx) == ROTATERT))
18547 {
18548 rtx shift_count = XEXP (shift_rtx, 1);
18549
18550 /* Return true if shift count is dest of SET_BODY. */
18551 if (REG_P (shift_count))
18552 {
18553 /* Add check since it can be invoked before register
18554 allocation in pre-reload schedule. */
18555 if (reload_completed
18556 && true_regnum (set_dest) == true_regnum (shift_count))
18557 return true;
18558 else if (REGNO(set_dest) == REGNO(shift_count))
18559 return true;
18560 }
18561 }
18562
18563 return false;
18564 }
18565
18566 /* Return true if destination reg of SET_INSN is shift count of
18567 USE_INSN. */
18568
18569 bool
18570 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18571 {
18572 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18573 PATTERN (use_insn));
18574 }
18575
18576 /* Return TRUE or FALSE depending on whether the unary operator meets the
18577 appropriate constraints. */
18578
18579 bool
18580 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18581 enum machine_mode mode ATTRIBUTE_UNUSED,
18582 rtx operands[2])
18583 {
18584 /* If one of operands is memory, source and destination must match. */
18585 if ((MEM_P (operands[0])
18586 || MEM_P (operands[1]))
18587 && ! rtx_equal_p (operands[0], operands[1]))
18588 return false;
18589 return true;
18590 }
18591
18592 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18593 are ok, keeping in mind the possible movddup alternative. */
18594
18595 bool
18596 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18597 {
18598 if (MEM_P (operands[0]))
18599 return rtx_equal_p (operands[0], operands[1 + high]);
18600 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18601 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18602 return true;
18603 }
18604
18605 /* Post-reload splitter for converting an SF or DFmode value in an
18606 SSE register into an unsigned SImode. */
18607
18608 void
18609 ix86_split_convert_uns_si_sse (rtx operands[])
18610 {
18611 enum machine_mode vecmode;
18612 rtx value, large, zero_or_two31, input, two31, x;
18613
18614 large = operands[1];
18615 zero_or_two31 = operands[2];
18616 input = operands[3];
18617 two31 = operands[4];
18618 vecmode = GET_MODE (large);
18619 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18620
18621 /* Load up the value into the low element. We must ensure that the other
18622 elements are valid floats -- zero is the easiest such value. */
18623 if (MEM_P (input))
18624 {
18625 if (vecmode == V4SFmode)
18626 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18627 else
18628 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18629 }
18630 else
18631 {
18632 input = gen_rtx_REG (vecmode, REGNO (input));
18633 emit_move_insn (value, CONST0_RTX (vecmode));
18634 if (vecmode == V4SFmode)
18635 emit_insn (gen_sse_movss (value, value, input));
18636 else
18637 emit_insn (gen_sse2_movsd (value, value, input));
18638 }
18639
18640 emit_move_insn (large, two31);
18641 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18642
18643 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18644 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18645
18646 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18647 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18648
18649 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18650 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18651
18652 large = gen_rtx_REG (V4SImode, REGNO (large));
18653 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18654
18655 x = gen_rtx_REG (V4SImode, REGNO (value));
18656 if (vecmode == V4SFmode)
18657 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18658 else
18659 emit_insn (gen_sse2_cvttpd2dq (x, value));
18660 value = x;
18661
18662 emit_insn (gen_xorv4si3 (value, value, large));
18663 }
18664
18665 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18666 Expects the 64-bit DImode to be supplied in a pair of integral
18667 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18668 -mfpmath=sse, !optimize_size only. */
18669
18670 void
18671 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18672 {
18673 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18674 rtx int_xmm, fp_xmm;
18675 rtx biases, exponents;
18676 rtx x;
18677
18678 int_xmm = gen_reg_rtx (V4SImode);
18679 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18680 emit_insn (gen_movdi_to_sse (int_xmm, input));
18681 else if (TARGET_SSE_SPLIT_REGS)
18682 {
18683 emit_clobber (int_xmm);
18684 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18685 }
18686 else
18687 {
18688 x = gen_reg_rtx (V2DImode);
18689 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18690 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18691 }
18692
18693 x = gen_rtx_CONST_VECTOR (V4SImode,
18694 gen_rtvec (4, GEN_INT (0x43300000UL),
18695 GEN_INT (0x45300000UL),
18696 const0_rtx, const0_rtx));
18697 exponents = validize_mem (force_const_mem (V4SImode, x));
18698
18699 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18700 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18701
18702 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18703 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18704 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18705 (0x1.0p84 + double(fp_value_hi_xmm)).
18706 Note these exponents differ by 32. */
18707
18708 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18709
18710 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18711 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18712 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18713 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18714 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18715 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18716 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18717 biases = validize_mem (force_const_mem (V2DFmode, biases));
18718 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18719
18720 /* Add the upper and lower DFmode values together. */
18721 if (TARGET_SSE3)
18722 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18723 else
18724 {
18725 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18726 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18727 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18728 }
18729
18730 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18731 }
18732
18733 /* Not used, but eases macroization of patterns. */
18734 void
18735 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18736 rtx input ATTRIBUTE_UNUSED)
18737 {
18738 gcc_unreachable ();
18739 }
18740
18741 /* Convert an unsigned SImode value into a DFmode. Only currently used
18742 for SSE, but applicable anywhere. */
18743
18744 void
18745 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18746 {
18747 REAL_VALUE_TYPE TWO31r;
18748 rtx x, fp;
18749
18750 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18751 NULL, 1, OPTAB_DIRECT);
18752
18753 fp = gen_reg_rtx (DFmode);
18754 emit_insn (gen_floatsidf2 (fp, x));
18755
18756 real_ldexp (&TWO31r, &dconst1, 31);
18757 x = const_double_from_real_value (TWO31r, DFmode);
18758
18759 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18760 if (x != target)
18761 emit_move_insn (target, x);
18762 }
18763
18764 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18765 32-bit mode; otherwise we have a direct convert instruction. */
18766
18767 void
18768 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18769 {
18770 REAL_VALUE_TYPE TWO32r;
18771 rtx fp_lo, fp_hi, x;
18772
18773 fp_lo = gen_reg_rtx (DFmode);
18774 fp_hi = gen_reg_rtx (DFmode);
18775
18776 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18777
18778 real_ldexp (&TWO32r, &dconst1, 32);
18779 x = const_double_from_real_value (TWO32r, DFmode);
18780 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18781
18782 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18783
18784 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18785 0, OPTAB_DIRECT);
18786 if (x != target)
18787 emit_move_insn (target, x);
18788 }
18789
18790 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18791 For x86_32, -mfpmath=sse, !optimize_size only. */
18792 void
18793 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18794 {
18795 REAL_VALUE_TYPE ONE16r;
18796 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18797
18798 real_ldexp (&ONE16r, &dconst1, 16);
18799 x = const_double_from_real_value (ONE16r, SFmode);
18800 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18801 NULL, 0, OPTAB_DIRECT);
18802 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18803 NULL, 0, OPTAB_DIRECT);
18804 fp_hi = gen_reg_rtx (SFmode);
18805 fp_lo = gen_reg_rtx (SFmode);
18806 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18807 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18808 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18809 0, OPTAB_DIRECT);
18810 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18811 0, OPTAB_DIRECT);
18812 if (!rtx_equal_p (target, fp_hi))
18813 emit_move_insn (target, fp_hi);
18814 }
18815
18816 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18817 a vector of unsigned ints VAL to vector of floats TARGET. */
18818
18819 void
18820 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18821 {
18822 rtx tmp[8];
18823 REAL_VALUE_TYPE TWO16r;
18824 enum machine_mode intmode = GET_MODE (val);
18825 enum machine_mode fltmode = GET_MODE (target);
18826 rtx (*cvt) (rtx, rtx);
18827
18828 if (intmode == V4SImode)
18829 cvt = gen_floatv4siv4sf2;
18830 else
18831 cvt = gen_floatv8siv8sf2;
18832 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18833 tmp[0] = force_reg (intmode, tmp[0]);
18834 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18835 OPTAB_DIRECT);
18836 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18837 NULL_RTX, 1, OPTAB_DIRECT);
18838 tmp[3] = gen_reg_rtx (fltmode);
18839 emit_insn (cvt (tmp[3], tmp[1]));
18840 tmp[4] = gen_reg_rtx (fltmode);
18841 emit_insn (cvt (tmp[4], tmp[2]));
18842 real_ldexp (&TWO16r, &dconst1, 16);
18843 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18844 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18845 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18846 OPTAB_DIRECT);
18847 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18848 OPTAB_DIRECT);
18849 if (tmp[7] != target)
18850 emit_move_insn (target, tmp[7]);
18851 }
18852
18853 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18854 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18855 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18856 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18857
18858 rtx
18859 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18860 {
18861 REAL_VALUE_TYPE TWO31r;
18862 rtx two31r, tmp[4];
18863 enum machine_mode mode = GET_MODE (val);
18864 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18865 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18866 rtx (*cmp) (rtx, rtx, rtx, rtx);
18867 int i;
18868
18869 for (i = 0; i < 3; i++)
18870 tmp[i] = gen_reg_rtx (mode);
18871 real_ldexp (&TWO31r, &dconst1, 31);
18872 two31r = const_double_from_real_value (TWO31r, scalarmode);
18873 two31r = ix86_build_const_vector (mode, 1, two31r);
18874 two31r = force_reg (mode, two31r);
18875 switch (mode)
18876 {
18877 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18878 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18879 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18880 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18881 default: gcc_unreachable ();
18882 }
18883 tmp[3] = gen_rtx_LE (mode, two31r, val);
18884 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18885 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18886 0, OPTAB_DIRECT);
18887 if (intmode == V4SImode || TARGET_AVX2)
18888 *xorp = expand_simple_binop (intmode, ASHIFT,
18889 gen_lowpart (intmode, tmp[0]),
18890 GEN_INT (31), NULL_RTX, 0,
18891 OPTAB_DIRECT);
18892 else
18893 {
18894 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18895 two31 = ix86_build_const_vector (intmode, 1, two31);
18896 *xorp = expand_simple_binop (intmode, AND,
18897 gen_lowpart (intmode, tmp[0]),
18898 two31, NULL_RTX, 0,
18899 OPTAB_DIRECT);
18900 }
18901 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18902 0, OPTAB_DIRECT);
18903 }
18904
18905 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18906 then replicate the value for all elements of the vector
18907 register. */
18908
18909 rtx
18910 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18911 {
18912 int i, n_elt;
18913 rtvec v;
18914 enum machine_mode scalar_mode;
18915
18916 switch (mode)
18917 {
18918 case V64QImode:
18919 case V32QImode:
18920 case V16QImode:
18921 case V32HImode:
18922 case V16HImode:
18923 case V8HImode:
18924 case V16SImode:
18925 case V8SImode:
18926 case V4SImode:
18927 case V8DImode:
18928 case V4DImode:
18929 case V2DImode:
18930 gcc_assert (vect);
18931 case V16SFmode:
18932 case V8SFmode:
18933 case V4SFmode:
18934 case V8DFmode:
18935 case V4DFmode:
18936 case V2DFmode:
18937 n_elt = GET_MODE_NUNITS (mode);
18938 v = rtvec_alloc (n_elt);
18939 scalar_mode = GET_MODE_INNER (mode);
18940
18941 RTVEC_ELT (v, 0) = value;
18942
18943 for (i = 1; i < n_elt; ++i)
18944 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18945
18946 return gen_rtx_CONST_VECTOR (mode, v);
18947
18948 default:
18949 gcc_unreachable ();
18950 }
18951 }
18952
18953 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18954 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18955 for an SSE register. If VECT is true, then replicate the mask for
18956 all elements of the vector register. If INVERT is true, then create
18957 a mask excluding the sign bit. */
18958
18959 rtx
18960 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18961 {
18962 enum machine_mode vec_mode, imode;
18963 HOST_WIDE_INT hi, lo;
18964 int shift = 63;
18965 rtx v;
18966 rtx mask;
18967
18968 /* Find the sign bit, sign extended to 2*HWI. */
18969 switch (mode)
18970 {
18971 case V16SImode:
18972 case V16SFmode:
18973 case V8SImode:
18974 case V4SImode:
18975 case V8SFmode:
18976 case V4SFmode:
18977 vec_mode = mode;
18978 mode = GET_MODE_INNER (mode);
18979 imode = SImode;
18980 lo = 0x80000000, hi = lo < 0;
18981 break;
18982
18983 case V8DImode:
18984 case V4DImode:
18985 case V2DImode:
18986 case V8DFmode:
18987 case V4DFmode:
18988 case V2DFmode:
18989 vec_mode = mode;
18990 mode = GET_MODE_INNER (mode);
18991 imode = DImode;
18992 if (HOST_BITS_PER_WIDE_INT >= 64)
18993 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18994 else
18995 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18996 break;
18997
18998 case TImode:
18999 case TFmode:
19000 vec_mode = VOIDmode;
19001 if (HOST_BITS_PER_WIDE_INT >= 64)
19002 {
19003 imode = TImode;
19004 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19005 }
19006 else
19007 {
19008 rtvec vec;
19009
19010 imode = DImode;
19011 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19012
19013 if (invert)
19014 {
19015 lo = ~lo, hi = ~hi;
19016 v = constm1_rtx;
19017 }
19018 else
19019 v = const0_rtx;
19020
19021 mask = immed_double_const (lo, hi, imode);
19022
19023 vec = gen_rtvec (2, v, mask);
19024 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19025 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19026
19027 return v;
19028 }
19029 break;
19030
19031 default:
19032 gcc_unreachable ();
19033 }
19034
19035 if (invert)
19036 lo = ~lo, hi = ~hi;
19037
19038 /* Force this value into the low part of a fp vector constant. */
19039 mask = immed_double_const (lo, hi, imode);
19040 mask = gen_lowpart (mode, mask);
19041
19042 if (vec_mode == VOIDmode)
19043 return force_reg (mode, mask);
19044
19045 v = ix86_build_const_vector (vec_mode, vect, mask);
19046 return force_reg (vec_mode, v);
19047 }
19048
19049 /* Generate code for floating point ABS or NEG. */
19050
19051 void
19052 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19053 rtx operands[])
19054 {
19055 rtx mask, set, dst, src;
19056 bool use_sse = false;
19057 bool vector_mode = VECTOR_MODE_P (mode);
19058 enum machine_mode vmode = mode;
19059
19060 if (vector_mode)
19061 use_sse = true;
19062 else if (mode == TFmode)
19063 use_sse = true;
19064 else if (TARGET_SSE_MATH)
19065 {
19066 use_sse = SSE_FLOAT_MODE_P (mode);
19067 if (mode == SFmode)
19068 vmode = V4SFmode;
19069 else if (mode == DFmode)
19070 vmode = V2DFmode;
19071 }
19072
19073 /* NEG and ABS performed with SSE use bitwise mask operations.
19074 Create the appropriate mask now. */
19075 if (use_sse)
19076 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19077 else
19078 mask = NULL_RTX;
19079
19080 dst = operands[0];
19081 src = operands[1];
19082
19083 set = gen_rtx_fmt_e (code, mode, src);
19084 set = gen_rtx_SET (VOIDmode, dst, set);
19085
19086 if (mask)
19087 {
19088 rtx use, clob;
19089 rtvec par;
19090
19091 use = gen_rtx_USE (VOIDmode, mask);
19092 if (vector_mode)
19093 par = gen_rtvec (2, set, use);
19094 else
19095 {
19096 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19097 par = gen_rtvec (3, set, use, clob);
19098 }
19099 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19100 }
19101 else
19102 emit_insn (set);
19103 }
19104
19105 /* Expand a copysign operation. Special case operand 0 being a constant. */
19106
19107 void
19108 ix86_expand_copysign (rtx operands[])
19109 {
19110 enum machine_mode mode, vmode;
19111 rtx dest, op0, op1, mask, nmask;
19112
19113 dest = operands[0];
19114 op0 = operands[1];
19115 op1 = operands[2];
19116
19117 mode = GET_MODE (dest);
19118
19119 if (mode == SFmode)
19120 vmode = V4SFmode;
19121 else if (mode == DFmode)
19122 vmode = V2DFmode;
19123 else
19124 vmode = mode;
19125
19126 if (GET_CODE (op0) == CONST_DOUBLE)
19127 {
19128 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19129
19130 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19131 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19132
19133 if (mode == SFmode || mode == DFmode)
19134 {
19135 if (op0 == CONST0_RTX (mode))
19136 op0 = CONST0_RTX (vmode);
19137 else
19138 {
19139 rtx v = ix86_build_const_vector (vmode, false, op0);
19140
19141 op0 = force_reg (vmode, v);
19142 }
19143 }
19144 else if (op0 != CONST0_RTX (mode))
19145 op0 = force_reg (mode, op0);
19146
19147 mask = ix86_build_signbit_mask (vmode, 0, 0);
19148
19149 if (mode == SFmode)
19150 copysign_insn = gen_copysignsf3_const;
19151 else if (mode == DFmode)
19152 copysign_insn = gen_copysigndf3_const;
19153 else
19154 copysign_insn = gen_copysigntf3_const;
19155
19156 emit_insn (copysign_insn (dest, op0, op1, mask));
19157 }
19158 else
19159 {
19160 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19161
19162 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19163 mask = ix86_build_signbit_mask (vmode, 0, 0);
19164
19165 if (mode == SFmode)
19166 copysign_insn = gen_copysignsf3_var;
19167 else if (mode == DFmode)
19168 copysign_insn = gen_copysigndf3_var;
19169 else
19170 copysign_insn = gen_copysigntf3_var;
19171
19172 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19173 }
19174 }
19175
19176 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19177 be a constant, and so has already been expanded into a vector constant. */
19178
19179 void
19180 ix86_split_copysign_const (rtx operands[])
19181 {
19182 enum machine_mode mode, vmode;
19183 rtx dest, op0, mask, x;
19184
19185 dest = operands[0];
19186 op0 = operands[1];
19187 mask = operands[3];
19188
19189 mode = GET_MODE (dest);
19190 vmode = GET_MODE (mask);
19191
19192 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19193 x = gen_rtx_AND (vmode, dest, mask);
19194 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19195
19196 if (op0 != CONST0_RTX (vmode))
19197 {
19198 x = gen_rtx_IOR (vmode, dest, op0);
19199 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19200 }
19201 }
19202
19203 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19204 so we have to do two masks. */
19205
19206 void
19207 ix86_split_copysign_var (rtx operands[])
19208 {
19209 enum machine_mode mode, vmode;
19210 rtx dest, scratch, op0, op1, mask, nmask, x;
19211
19212 dest = operands[0];
19213 scratch = operands[1];
19214 op0 = operands[2];
19215 op1 = operands[3];
19216 nmask = operands[4];
19217 mask = operands[5];
19218
19219 mode = GET_MODE (dest);
19220 vmode = GET_MODE (mask);
19221
19222 if (rtx_equal_p (op0, op1))
19223 {
19224 /* Shouldn't happen often (it's useless, obviously), but when it does
19225 we'd generate incorrect code if we continue below. */
19226 emit_move_insn (dest, op0);
19227 return;
19228 }
19229
19230 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19231 {
19232 gcc_assert (REGNO (op1) == REGNO (scratch));
19233
19234 x = gen_rtx_AND (vmode, scratch, mask);
19235 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19236
19237 dest = mask;
19238 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19239 x = gen_rtx_NOT (vmode, dest);
19240 x = gen_rtx_AND (vmode, x, op0);
19241 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19242 }
19243 else
19244 {
19245 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19246 {
19247 x = gen_rtx_AND (vmode, scratch, mask);
19248 }
19249 else /* alternative 2,4 */
19250 {
19251 gcc_assert (REGNO (mask) == REGNO (scratch));
19252 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19253 x = gen_rtx_AND (vmode, scratch, op1);
19254 }
19255 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19256
19257 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19258 {
19259 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19260 x = gen_rtx_AND (vmode, dest, nmask);
19261 }
19262 else /* alternative 3,4 */
19263 {
19264 gcc_assert (REGNO (nmask) == REGNO (dest));
19265 dest = nmask;
19266 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19267 x = gen_rtx_AND (vmode, dest, op0);
19268 }
19269 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19270 }
19271
19272 x = gen_rtx_IOR (vmode, dest, scratch);
19273 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19274 }
19275
19276 /* Return TRUE or FALSE depending on whether the first SET in INSN
19277 has source and destination with matching CC modes, and that the
19278 CC mode is at least as constrained as REQ_MODE. */
19279
19280 bool
19281 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19282 {
19283 rtx set;
19284 enum machine_mode set_mode;
19285
19286 set = PATTERN (insn);
19287 if (GET_CODE (set) == PARALLEL)
19288 set = XVECEXP (set, 0, 0);
19289 gcc_assert (GET_CODE (set) == SET);
19290 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19291
19292 set_mode = GET_MODE (SET_DEST (set));
19293 switch (set_mode)
19294 {
19295 case CCNOmode:
19296 if (req_mode != CCNOmode
19297 && (req_mode != CCmode
19298 || XEXP (SET_SRC (set), 1) != const0_rtx))
19299 return false;
19300 break;
19301 case CCmode:
19302 if (req_mode == CCGCmode)
19303 return false;
19304 /* FALLTHRU */
19305 case CCGCmode:
19306 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19307 return false;
19308 /* FALLTHRU */
19309 case CCGOCmode:
19310 if (req_mode == CCZmode)
19311 return false;
19312 /* FALLTHRU */
19313 case CCZmode:
19314 break;
19315
19316 case CCAmode:
19317 case CCCmode:
19318 case CCOmode:
19319 case CCSmode:
19320 if (set_mode != req_mode)
19321 return false;
19322 break;
19323
19324 default:
19325 gcc_unreachable ();
19326 }
19327
19328 return GET_MODE (SET_SRC (set)) == set_mode;
19329 }
19330
19331 /* Generate insn patterns to do an integer compare of OPERANDS. */
19332
19333 static rtx
19334 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19335 {
19336 enum machine_mode cmpmode;
19337 rtx tmp, flags;
19338
19339 cmpmode = SELECT_CC_MODE (code, op0, op1);
19340 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19341
19342 /* This is very simple, but making the interface the same as in the
19343 FP case makes the rest of the code easier. */
19344 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19345 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19346
19347 /* Return the test that should be put into the flags user, i.e.
19348 the bcc, scc, or cmov instruction. */
19349 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19350 }
19351
19352 /* Figure out whether to use ordered or unordered fp comparisons.
19353 Return the appropriate mode to use. */
19354
19355 enum machine_mode
19356 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19357 {
19358 /* ??? In order to make all comparisons reversible, we do all comparisons
19359 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19360 all forms trapping and nontrapping comparisons, we can make inequality
19361 comparisons trapping again, since it results in better code when using
19362 FCOM based compares. */
19363 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19364 }
19365
19366 enum machine_mode
19367 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19368 {
19369 enum machine_mode mode = GET_MODE (op0);
19370
19371 if (SCALAR_FLOAT_MODE_P (mode))
19372 {
19373 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19374 return ix86_fp_compare_mode (code);
19375 }
19376
19377 switch (code)
19378 {
19379 /* Only zero flag is needed. */
19380 case EQ: /* ZF=0 */
19381 case NE: /* ZF!=0 */
19382 return CCZmode;
19383 /* Codes needing carry flag. */
19384 case GEU: /* CF=0 */
19385 case LTU: /* CF=1 */
19386 /* Detect overflow checks. They need just the carry flag. */
19387 if (GET_CODE (op0) == PLUS
19388 && rtx_equal_p (op1, XEXP (op0, 0)))
19389 return CCCmode;
19390 else
19391 return CCmode;
19392 case GTU: /* CF=0 & ZF=0 */
19393 case LEU: /* CF=1 | ZF=1 */
19394 return CCmode;
19395 /* Codes possibly doable only with sign flag when
19396 comparing against zero. */
19397 case GE: /* SF=OF or SF=0 */
19398 case LT: /* SF<>OF or SF=1 */
19399 if (op1 == const0_rtx)
19400 return CCGOCmode;
19401 else
19402 /* For other cases Carry flag is not required. */
19403 return CCGCmode;
19404 /* Codes doable only with sign flag when comparing
19405 against zero, but we miss jump instruction for it
19406 so we need to use relational tests against overflow
19407 that thus needs to be zero. */
19408 case GT: /* ZF=0 & SF=OF */
19409 case LE: /* ZF=1 | SF<>OF */
19410 if (op1 == const0_rtx)
19411 return CCNOmode;
19412 else
19413 return CCGCmode;
19414 /* strcmp pattern do (use flags) and combine may ask us for proper
19415 mode. */
19416 case USE:
19417 return CCmode;
19418 default:
19419 gcc_unreachable ();
19420 }
19421 }
19422
19423 /* Return the fixed registers used for condition codes. */
19424
19425 static bool
19426 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19427 {
19428 *p1 = FLAGS_REG;
19429 *p2 = FPSR_REG;
19430 return true;
19431 }
19432
19433 /* If two condition code modes are compatible, return a condition code
19434 mode which is compatible with both. Otherwise, return
19435 VOIDmode. */
19436
19437 static enum machine_mode
19438 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19439 {
19440 if (m1 == m2)
19441 return m1;
19442
19443 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19444 return VOIDmode;
19445
19446 if ((m1 == CCGCmode && m2 == CCGOCmode)
19447 || (m1 == CCGOCmode && m2 == CCGCmode))
19448 return CCGCmode;
19449
19450 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19451 return m2;
19452 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19453 return m1;
19454
19455 switch (m1)
19456 {
19457 default:
19458 gcc_unreachable ();
19459
19460 case CCmode:
19461 case CCGCmode:
19462 case CCGOCmode:
19463 case CCNOmode:
19464 case CCAmode:
19465 case CCCmode:
19466 case CCOmode:
19467 case CCSmode:
19468 case CCZmode:
19469 switch (m2)
19470 {
19471 default:
19472 return VOIDmode;
19473
19474 case CCmode:
19475 case CCGCmode:
19476 case CCGOCmode:
19477 case CCNOmode:
19478 case CCAmode:
19479 case CCCmode:
19480 case CCOmode:
19481 case CCSmode:
19482 case CCZmode:
19483 return CCmode;
19484 }
19485
19486 case CCFPmode:
19487 case CCFPUmode:
19488 /* These are only compatible with themselves, which we already
19489 checked above. */
19490 return VOIDmode;
19491 }
19492 }
19493
19494
19495 /* Return a comparison we can do and that it is equivalent to
19496 swap_condition (code) apart possibly from orderedness.
19497 But, never change orderedness if TARGET_IEEE_FP, returning
19498 UNKNOWN in that case if necessary. */
19499
19500 static enum rtx_code
19501 ix86_fp_swap_condition (enum rtx_code code)
19502 {
19503 switch (code)
19504 {
19505 case GT: /* GTU - CF=0 & ZF=0 */
19506 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19507 case GE: /* GEU - CF=0 */
19508 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19509 case UNLT: /* LTU - CF=1 */
19510 return TARGET_IEEE_FP ? UNKNOWN : GT;
19511 case UNLE: /* LEU - CF=1 | ZF=1 */
19512 return TARGET_IEEE_FP ? UNKNOWN : GE;
19513 default:
19514 return swap_condition (code);
19515 }
19516 }
19517
19518 /* Return cost of comparison CODE using the best strategy for performance.
19519 All following functions do use number of instructions as a cost metrics.
19520 In future this should be tweaked to compute bytes for optimize_size and
19521 take into account performance of various instructions on various CPUs. */
19522
19523 static int
19524 ix86_fp_comparison_cost (enum rtx_code code)
19525 {
19526 int arith_cost;
19527
19528 /* The cost of code using bit-twiddling on %ah. */
19529 switch (code)
19530 {
19531 case UNLE:
19532 case UNLT:
19533 case LTGT:
19534 case GT:
19535 case GE:
19536 case UNORDERED:
19537 case ORDERED:
19538 case UNEQ:
19539 arith_cost = 4;
19540 break;
19541 case LT:
19542 case NE:
19543 case EQ:
19544 case UNGE:
19545 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19546 break;
19547 case LE:
19548 case UNGT:
19549 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19550 break;
19551 default:
19552 gcc_unreachable ();
19553 }
19554
19555 switch (ix86_fp_comparison_strategy (code))
19556 {
19557 case IX86_FPCMP_COMI:
19558 return arith_cost > 4 ? 3 : 2;
19559 case IX86_FPCMP_SAHF:
19560 return arith_cost > 4 ? 4 : 3;
19561 default:
19562 return arith_cost;
19563 }
19564 }
19565
19566 /* Return strategy to use for floating-point. We assume that fcomi is always
19567 preferrable where available, since that is also true when looking at size
19568 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19569
19570 enum ix86_fpcmp_strategy
19571 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19572 {
19573 /* Do fcomi/sahf based test when profitable. */
19574
19575 if (TARGET_CMOVE)
19576 return IX86_FPCMP_COMI;
19577
19578 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19579 return IX86_FPCMP_SAHF;
19580
19581 return IX86_FPCMP_ARITH;
19582 }
19583
19584 /* Swap, force into registers, or otherwise massage the two operands
19585 to a fp comparison. The operands are updated in place; the new
19586 comparison code is returned. */
19587
19588 static enum rtx_code
19589 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19590 {
19591 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19592 rtx op0 = *pop0, op1 = *pop1;
19593 enum machine_mode op_mode = GET_MODE (op0);
19594 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19595
19596 /* All of the unordered compare instructions only work on registers.
19597 The same is true of the fcomi compare instructions. The XFmode
19598 compare instructions require registers except when comparing
19599 against zero or when converting operand 1 from fixed point to
19600 floating point. */
19601
19602 if (!is_sse
19603 && (fpcmp_mode == CCFPUmode
19604 || (op_mode == XFmode
19605 && ! (standard_80387_constant_p (op0) == 1
19606 || standard_80387_constant_p (op1) == 1)
19607 && GET_CODE (op1) != FLOAT)
19608 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19609 {
19610 op0 = force_reg (op_mode, op0);
19611 op1 = force_reg (op_mode, op1);
19612 }
19613 else
19614 {
19615 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19616 things around if they appear profitable, otherwise force op0
19617 into a register. */
19618
19619 if (standard_80387_constant_p (op0) == 0
19620 || (MEM_P (op0)
19621 && ! (standard_80387_constant_p (op1) == 0
19622 || MEM_P (op1))))
19623 {
19624 enum rtx_code new_code = ix86_fp_swap_condition (code);
19625 if (new_code != UNKNOWN)
19626 {
19627 rtx tmp;
19628 tmp = op0, op0 = op1, op1 = tmp;
19629 code = new_code;
19630 }
19631 }
19632
19633 if (!REG_P (op0))
19634 op0 = force_reg (op_mode, op0);
19635
19636 if (CONSTANT_P (op1))
19637 {
19638 int tmp = standard_80387_constant_p (op1);
19639 if (tmp == 0)
19640 op1 = validize_mem (force_const_mem (op_mode, op1));
19641 else if (tmp == 1)
19642 {
19643 if (TARGET_CMOVE)
19644 op1 = force_reg (op_mode, op1);
19645 }
19646 else
19647 op1 = force_reg (op_mode, op1);
19648 }
19649 }
19650
19651 /* Try to rearrange the comparison to make it cheaper. */
19652 if (ix86_fp_comparison_cost (code)
19653 > ix86_fp_comparison_cost (swap_condition (code))
19654 && (REG_P (op1) || can_create_pseudo_p ()))
19655 {
19656 rtx tmp;
19657 tmp = op0, op0 = op1, op1 = tmp;
19658 code = swap_condition (code);
19659 if (!REG_P (op0))
19660 op0 = force_reg (op_mode, op0);
19661 }
19662
19663 *pop0 = op0;
19664 *pop1 = op1;
19665 return code;
19666 }
19667
19668 /* Convert comparison codes we use to represent FP comparison to integer
19669 code that will result in proper branch. Return UNKNOWN if no such code
19670 is available. */
19671
19672 enum rtx_code
19673 ix86_fp_compare_code_to_integer (enum rtx_code code)
19674 {
19675 switch (code)
19676 {
19677 case GT:
19678 return GTU;
19679 case GE:
19680 return GEU;
19681 case ORDERED:
19682 case UNORDERED:
19683 return code;
19684 break;
19685 case UNEQ:
19686 return EQ;
19687 break;
19688 case UNLT:
19689 return LTU;
19690 break;
19691 case UNLE:
19692 return LEU;
19693 break;
19694 case LTGT:
19695 return NE;
19696 break;
19697 default:
19698 return UNKNOWN;
19699 }
19700 }
19701
19702 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19703
19704 static rtx
19705 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19706 {
19707 enum machine_mode fpcmp_mode, intcmp_mode;
19708 rtx tmp, tmp2;
19709
19710 fpcmp_mode = ix86_fp_compare_mode (code);
19711 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19712
19713 /* Do fcomi/sahf based test when profitable. */
19714 switch (ix86_fp_comparison_strategy (code))
19715 {
19716 case IX86_FPCMP_COMI:
19717 intcmp_mode = fpcmp_mode;
19718 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19719 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19720 tmp);
19721 emit_insn (tmp);
19722 break;
19723
19724 case IX86_FPCMP_SAHF:
19725 intcmp_mode = fpcmp_mode;
19726 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19727 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19728 tmp);
19729
19730 if (!scratch)
19731 scratch = gen_reg_rtx (HImode);
19732 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19733 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19734 break;
19735
19736 case IX86_FPCMP_ARITH:
19737 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19738 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19739 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19740 if (!scratch)
19741 scratch = gen_reg_rtx (HImode);
19742 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19743
19744 /* In the unordered case, we have to check C2 for NaN's, which
19745 doesn't happen to work out to anything nice combination-wise.
19746 So do some bit twiddling on the value we've got in AH to come
19747 up with an appropriate set of condition codes. */
19748
19749 intcmp_mode = CCNOmode;
19750 switch (code)
19751 {
19752 case GT:
19753 case UNGT:
19754 if (code == GT || !TARGET_IEEE_FP)
19755 {
19756 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19757 code = EQ;
19758 }
19759 else
19760 {
19761 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19762 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19763 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19764 intcmp_mode = CCmode;
19765 code = GEU;
19766 }
19767 break;
19768 case LT:
19769 case UNLT:
19770 if (code == LT && TARGET_IEEE_FP)
19771 {
19772 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19773 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19774 intcmp_mode = CCmode;
19775 code = EQ;
19776 }
19777 else
19778 {
19779 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19780 code = NE;
19781 }
19782 break;
19783 case GE:
19784 case UNGE:
19785 if (code == GE || !TARGET_IEEE_FP)
19786 {
19787 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19788 code = EQ;
19789 }
19790 else
19791 {
19792 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19793 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19794 code = NE;
19795 }
19796 break;
19797 case LE:
19798 case UNLE:
19799 if (code == LE && TARGET_IEEE_FP)
19800 {
19801 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19802 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19803 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19804 intcmp_mode = CCmode;
19805 code = LTU;
19806 }
19807 else
19808 {
19809 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19810 code = NE;
19811 }
19812 break;
19813 case EQ:
19814 case UNEQ:
19815 if (code == EQ && TARGET_IEEE_FP)
19816 {
19817 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19818 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19819 intcmp_mode = CCmode;
19820 code = EQ;
19821 }
19822 else
19823 {
19824 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19825 code = NE;
19826 }
19827 break;
19828 case NE:
19829 case LTGT:
19830 if (code == NE && TARGET_IEEE_FP)
19831 {
19832 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19833 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19834 GEN_INT (0x40)));
19835 code = NE;
19836 }
19837 else
19838 {
19839 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19840 code = EQ;
19841 }
19842 break;
19843
19844 case UNORDERED:
19845 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19846 code = NE;
19847 break;
19848 case ORDERED:
19849 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19850 code = EQ;
19851 break;
19852
19853 default:
19854 gcc_unreachable ();
19855 }
19856 break;
19857
19858 default:
19859 gcc_unreachable();
19860 }
19861
19862 /* Return the test that should be put into the flags user, i.e.
19863 the bcc, scc, or cmov instruction. */
19864 return gen_rtx_fmt_ee (code, VOIDmode,
19865 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19866 const0_rtx);
19867 }
19868
19869 static rtx
19870 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19871 {
19872 rtx ret;
19873
19874 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19875 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19876
19877 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19878 {
19879 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19880 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19881 }
19882 else
19883 ret = ix86_expand_int_compare (code, op0, op1);
19884
19885 return ret;
19886 }
19887
19888 void
19889 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19890 {
19891 enum machine_mode mode = GET_MODE (op0);
19892 rtx tmp;
19893
19894 switch (mode)
19895 {
19896 case SFmode:
19897 case DFmode:
19898 case XFmode:
19899 case QImode:
19900 case HImode:
19901 case SImode:
19902 simple:
19903 tmp = ix86_expand_compare (code, op0, op1);
19904 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19905 gen_rtx_LABEL_REF (VOIDmode, label),
19906 pc_rtx);
19907 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19908 return;
19909
19910 case DImode:
19911 if (TARGET_64BIT)
19912 goto simple;
19913 case TImode:
19914 /* Expand DImode branch into multiple compare+branch. */
19915 {
19916 rtx lo[2], hi[2], label2;
19917 enum rtx_code code1, code2, code3;
19918 enum machine_mode submode;
19919
19920 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19921 {
19922 tmp = op0, op0 = op1, op1 = tmp;
19923 code = swap_condition (code);
19924 }
19925
19926 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19927 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19928
19929 submode = mode == DImode ? SImode : DImode;
19930
19931 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19932 avoid two branches. This costs one extra insn, so disable when
19933 optimizing for size. */
19934
19935 if ((code == EQ || code == NE)
19936 && (!optimize_insn_for_size_p ()
19937 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19938 {
19939 rtx xor0, xor1;
19940
19941 xor1 = hi[0];
19942 if (hi[1] != const0_rtx)
19943 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19944 NULL_RTX, 0, OPTAB_WIDEN);
19945
19946 xor0 = lo[0];
19947 if (lo[1] != const0_rtx)
19948 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19949 NULL_RTX, 0, OPTAB_WIDEN);
19950
19951 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19952 NULL_RTX, 0, OPTAB_WIDEN);
19953
19954 ix86_expand_branch (code, tmp, const0_rtx, label);
19955 return;
19956 }
19957
19958 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19959 op1 is a constant and the low word is zero, then we can just
19960 examine the high word. Similarly for low word -1 and
19961 less-or-equal-than or greater-than. */
19962
19963 if (CONST_INT_P (hi[1]))
19964 switch (code)
19965 {
19966 case LT: case LTU: case GE: case GEU:
19967 if (lo[1] == const0_rtx)
19968 {
19969 ix86_expand_branch (code, hi[0], hi[1], label);
19970 return;
19971 }
19972 break;
19973 case LE: case LEU: case GT: case GTU:
19974 if (lo[1] == constm1_rtx)
19975 {
19976 ix86_expand_branch (code, hi[0], hi[1], label);
19977 return;
19978 }
19979 break;
19980 default:
19981 break;
19982 }
19983
19984 /* Otherwise, we need two or three jumps. */
19985
19986 label2 = gen_label_rtx ();
19987
19988 code1 = code;
19989 code2 = swap_condition (code);
19990 code3 = unsigned_condition (code);
19991
19992 switch (code)
19993 {
19994 case LT: case GT: case LTU: case GTU:
19995 break;
19996
19997 case LE: code1 = LT; code2 = GT; break;
19998 case GE: code1 = GT; code2 = LT; break;
19999 case LEU: code1 = LTU; code2 = GTU; break;
20000 case GEU: code1 = GTU; code2 = LTU; break;
20001
20002 case EQ: code1 = UNKNOWN; code2 = NE; break;
20003 case NE: code2 = UNKNOWN; break;
20004
20005 default:
20006 gcc_unreachable ();
20007 }
20008
20009 /*
20010 * a < b =>
20011 * if (hi(a) < hi(b)) goto true;
20012 * if (hi(a) > hi(b)) goto false;
20013 * if (lo(a) < lo(b)) goto true;
20014 * false:
20015 */
20016
20017 if (code1 != UNKNOWN)
20018 ix86_expand_branch (code1, hi[0], hi[1], label);
20019 if (code2 != UNKNOWN)
20020 ix86_expand_branch (code2, hi[0], hi[1], label2);
20021
20022 ix86_expand_branch (code3, lo[0], lo[1], label);
20023
20024 if (code2 != UNKNOWN)
20025 emit_label (label2);
20026 return;
20027 }
20028
20029 default:
20030 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20031 goto simple;
20032 }
20033 }
20034
20035 /* Split branch based on floating point condition. */
20036 void
20037 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20038 rtx target1, rtx target2, rtx tmp)
20039 {
20040 rtx condition;
20041 rtx i;
20042
20043 if (target2 != pc_rtx)
20044 {
20045 rtx tmp = target2;
20046 code = reverse_condition_maybe_unordered (code);
20047 target2 = target1;
20048 target1 = tmp;
20049 }
20050
20051 condition = ix86_expand_fp_compare (code, op1, op2,
20052 tmp);
20053
20054 i = emit_jump_insn (gen_rtx_SET
20055 (VOIDmode, pc_rtx,
20056 gen_rtx_IF_THEN_ELSE (VOIDmode,
20057 condition, target1, target2)));
20058 if (split_branch_probability >= 0)
20059 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20060 }
20061
20062 void
20063 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20064 {
20065 rtx ret;
20066
20067 gcc_assert (GET_MODE (dest) == QImode);
20068
20069 ret = ix86_expand_compare (code, op0, op1);
20070 PUT_MODE (ret, QImode);
20071 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20072 }
20073
20074 /* Expand comparison setting or clearing carry flag. Return true when
20075 successful and set pop for the operation. */
20076 static bool
20077 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20078 {
20079 enum machine_mode mode =
20080 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20081
20082 /* Do not handle double-mode compares that go through special path. */
20083 if (mode == (TARGET_64BIT ? TImode : DImode))
20084 return false;
20085
20086 if (SCALAR_FLOAT_MODE_P (mode))
20087 {
20088 rtx compare_op, compare_seq;
20089
20090 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20091
20092 /* Shortcut: following common codes never translate
20093 into carry flag compares. */
20094 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20095 || code == ORDERED || code == UNORDERED)
20096 return false;
20097
20098 /* These comparisons require zero flag; swap operands so they won't. */
20099 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20100 && !TARGET_IEEE_FP)
20101 {
20102 rtx tmp = op0;
20103 op0 = op1;
20104 op1 = tmp;
20105 code = swap_condition (code);
20106 }
20107
20108 /* Try to expand the comparison and verify that we end up with
20109 carry flag based comparison. This fails to be true only when
20110 we decide to expand comparison using arithmetic that is not
20111 too common scenario. */
20112 start_sequence ();
20113 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20114 compare_seq = get_insns ();
20115 end_sequence ();
20116
20117 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20118 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20119 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20120 else
20121 code = GET_CODE (compare_op);
20122
20123 if (code != LTU && code != GEU)
20124 return false;
20125
20126 emit_insn (compare_seq);
20127 *pop = compare_op;
20128 return true;
20129 }
20130
20131 if (!INTEGRAL_MODE_P (mode))
20132 return false;
20133
20134 switch (code)
20135 {
20136 case LTU:
20137 case GEU:
20138 break;
20139
20140 /* Convert a==0 into (unsigned)a<1. */
20141 case EQ:
20142 case NE:
20143 if (op1 != const0_rtx)
20144 return false;
20145 op1 = const1_rtx;
20146 code = (code == EQ ? LTU : GEU);
20147 break;
20148
20149 /* Convert a>b into b<a or a>=b-1. */
20150 case GTU:
20151 case LEU:
20152 if (CONST_INT_P (op1))
20153 {
20154 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20155 /* Bail out on overflow. We still can swap operands but that
20156 would force loading of the constant into register. */
20157 if (op1 == const0_rtx
20158 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20159 return false;
20160 code = (code == GTU ? GEU : LTU);
20161 }
20162 else
20163 {
20164 rtx tmp = op1;
20165 op1 = op0;
20166 op0 = tmp;
20167 code = (code == GTU ? LTU : GEU);
20168 }
20169 break;
20170
20171 /* Convert a>=0 into (unsigned)a<0x80000000. */
20172 case LT:
20173 case GE:
20174 if (mode == DImode || op1 != const0_rtx)
20175 return false;
20176 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20177 code = (code == LT ? GEU : LTU);
20178 break;
20179 case LE:
20180 case GT:
20181 if (mode == DImode || op1 != constm1_rtx)
20182 return false;
20183 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20184 code = (code == LE ? GEU : LTU);
20185 break;
20186
20187 default:
20188 return false;
20189 }
20190 /* Swapping operands may cause constant to appear as first operand. */
20191 if (!nonimmediate_operand (op0, VOIDmode))
20192 {
20193 if (!can_create_pseudo_p ())
20194 return false;
20195 op0 = force_reg (mode, op0);
20196 }
20197 *pop = ix86_expand_compare (code, op0, op1);
20198 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20199 return true;
20200 }
20201
20202 bool
20203 ix86_expand_int_movcc (rtx operands[])
20204 {
20205 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20206 rtx compare_seq, compare_op;
20207 enum machine_mode mode = GET_MODE (operands[0]);
20208 bool sign_bit_compare_p = false;
20209 rtx op0 = XEXP (operands[1], 0);
20210 rtx op1 = XEXP (operands[1], 1);
20211
20212 if (GET_MODE (op0) == TImode
20213 || (GET_MODE (op0) == DImode
20214 && !TARGET_64BIT))
20215 return false;
20216
20217 start_sequence ();
20218 compare_op = ix86_expand_compare (code, op0, op1);
20219 compare_seq = get_insns ();
20220 end_sequence ();
20221
20222 compare_code = GET_CODE (compare_op);
20223
20224 if ((op1 == const0_rtx && (code == GE || code == LT))
20225 || (op1 == constm1_rtx && (code == GT || code == LE)))
20226 sign_bit_compare_p = true;
20227
20228 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20229 HImode insns, we'd be swallowed in word prefix ops. */
20230
20231 if ((mode != HImode || TARGET_FAST_PREFIX)
20232 && (mode != (TARGET_64BIT ? TImode : DImode))
20233 && CONST_INT_P (operands[2])
20234 && CONST_INT_P (operands[3]))
20235 {
20236 rtx out = operands[0];
20237 HOST_WIDE_INT ct = INTVAL (operands[2]);
20238 HOST_WIDE_INT cf = INTVAL (operands[3]);
20239 HOST_WIDE_INT diff;
20240
20241 diff = ct - cf;
20242 /* Sign bit compares are better done using shifts than we do by using
20243 sbb. */
20244 if (sign_bit_compare_p
20245 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20246 {
20247 /* Detect overlap between destination and compare sources. */
20248 rtx tmp = out;
20249
20250 if (!sign_bit_compare_p)
20251 {
20252 rtx flags;
20253 bool fpcmp = false;
20254
20255 compare_code = GET_CODE (compare_op);
20256
20257 flags = XEXP (compare_op, 0);
20258
20259 if (GET_MODE (flags) == CCFPmode
20260 || GET_MODE (flags) == CCFPUmode)
20261 {
20262 fpcmp = true;
20263 compare_code
20264 = ix86_fp_compare_code_to_integer (compare_code);
20265 }
20266
20267 /* To simplify rest of code, restrict to the GEU case. */
20268 if (compare_code == LTU)
20269 {
20270 HOST_WIDE_INT tmp = ct;
20271 ct = cf;
20272 cf = tmp;
20273 compare_code = reverse_condition (compare_code);
20274 code = reverse_condition (code);
20275 }
20276 else
20277 {
20278 if (fpcmp)
20279 PUT_CODE (compare_op,
20280 reverse_condition_maybe_unordered
20281 (GET_CODE (compare_op)));
20282 else
20283 PUT_CODE (compare_op,
20284 reverse_condition (GET_CODE (compare_op)));
20285 }
20286 diff = ct - cf;
20287
20288 if (reg_overlap_mentioned_p (out, op0)
20289 || reg_overlap_mentioned_p (out, op1))
20290 tmp = gen_reg_rtx (mode);
20291
20292 if (mode == DImode)
20293 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20294 else
20295 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20296 flags, compare_op));
20297 }
20298 else
20299 {
20300 if (code == GT || code == GE)
20301 code = reverse_condition (code);
20302 else
20303 {
20304 HOST_WIDE_INT tmp = ct;
20305 ct = cf;
20306 cf = tmp;
20307 diff = ct - cf;
20308 }
20309 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20310 }
20311
20312 if (diff == 1)
20313 {
20314 /*
20315 * cmpl op0,op1
20316 * sbbl dest,dest
20317 * [addl dest, ct]
20318 *
20319 * Size 5 - 8.
20320 */
20321 if (ct)
20322 tmp = expand_simple_binop (mode, PLUS,
20323 tmp, GEN_INT (ct),
20324 copy_rtx (tmp), 1, OPTAB_DIRECT);
20325 }
20326 else if (cf == -1)
20327 {
20328 /*
20329 * cmpl op0,op1
20330 * sbbl dest,dest
20331 * orl $ct, dest
20332 *
20333 * Size 8.
20334 */
20335 tmp = expand_simple_binop (mode, IOR,
20336 tmp, GEN_INT (ct),
20337 copy_rtx (tmp), 1, OPTAB_DIRECT);
20338 }
20339 else if (diff == -1 && ct)
20340 {
20341 /*
20342 * cmpl op0,op1
20343 * sbbl dest,dest
20344 * notl dest
20345 * [addl dest, cf]
20346 *
20347 * Size 8 - 11.
20348 */
20349 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20350 if (cf)
20351 tmp = expand_simple_binop (mode, PLUS,
20352 copy_rtx (tmp), GEN_INT (cf),
20353 copy_rtx (tmp), 1, OPTAB_DIRECT);
20354 }
20355 else
20356 {
20357 /*
20358 * cmpl op0,op1
20359 * sbbl dest,dest
20360 * [notl dest]
20361 * andl cf - ct, dest
20362 * [addl dest, ct]
20363 *
20364 * Size 8 - 11.
20365 */
20366
20367 if (cf == 0)
20368 {
20369 cf = ct;
20370 ct = 0;
20371 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20372 }
20373
20374 tmp = expand_simple_binop (mode, AND,
20375 copy_rtx (tmp),
20376 gen_int_mode (cf - ct, mode),
20377 copy_rtx (tmp), 1, OPTAB_DIRECT);
20378 if (ct)
20379 tmp = expand_simple_binop (mode, PLUS,
20380 copy_rtx (tmp), GEN_INT (ct),
20381 copy_rtx (tmp), 1, OPTAB_DIRECT);
20382 }
20383
20384 if (!rtx_equal_p (tmp, out))
20385 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20386
20387 return true;
20388 }
20389
20390 if (diff < 0)
20391 {
20392 enum machine_mode cmp_mode = GET_MODE (op0);
20393
20394 HOST_WIDE_INT tmp;
20395 tmp = ct, ct = cf, cf = tmp;
20396 diff = -diff;
20397
20398 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20399 {
20400 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20401
20402 /* We may be reversing unordered compare to normal compare, that
20403 is not valid in general (we may convert non-trapping condition
20404 to trapping one), however on i386 we currently emit all
20405 comparisons unordered. */
20406 compare_code = reverse_condition_maybe_unordered (compare_code);
20407 code = reverse_condition_maybe_unordered (code);
20408 }
20409 else
20410 {
20411 compare_code = reverse_condition (compare_code);
20412 code = reverse_condition (code);
20413 }
20414 }
20415
20416 compare_code = UNKNOWN;
20417 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20418 && CONST_INT_P (op1))
20419 {
20420 if (op1 == const0_rtx
20421 && (code == LT || code == GE))
20422 compare_code = code;
20423 else if (op1 == constm1_rtx)
20424 {
20425 if (code == LE)
20426 compare_code = LT;
20427 else if (code == GT)
20428 compare_code = GE;
20429 }
20430 }
20431
20432 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20433 if (compare_code != UNKNOWN
20434 && GET_MODE (op0) == GET_MODE (out)
20435 && (cf == -1 || ct == -1))
20436 {
20437 /* If lea code below could be used, only optimize
20438 if it results in a 2 insn sequence. */
20439
20440 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20441 || diff == 3 || diff == 5 || diff == 9)
20442 || (compare_code == LT && ct == -1)
20443 || (compare_code == GE && cf == -1))
20444 {
20445 /*
20446 * notl op1 (if necessary)
20447 * sarl $31, op1
20448 * orl cf, op1
20449 */
20450 if (ct != -1)
20451 {
20452 cf = ct;
20453 ct = -1;
20454 code = reverse_condition (code);
20455 }
20456
20457 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20458
20459 out = expand_simple_binop (mode, IOR,
20460 out, GEN_INT (cf),
20461 out, 1, OPTAB_DIRECT);
20462 if (out != operands[0])
20463 emit_move_insn (operands[0], out);
20464
20465 return true;
20466 }
20467 }
20468
20469
20470 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20471 || diff == 3 || diff == 5 || diff == 9)
20472 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20473 && (mode != DImode
20474 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20475 {
20476 /*
20477 * xorl dest,dest
20478 * cmpl op1,op2
20479 * setcc dest
20480 * lea cf(dest*(ct-cf)),dest
20481 *
20482 * Size 14.
20483 *
20484 * This also catches the degenerate setcc-only case.
20485 */
20486
20487 rtx tmp;
20488 int nops;
20489
20490 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20491
20492 nops = 0;
20493 /* On x86_64 the lea instruction operates on Pmode, so we need
20494 to get arithmetics done in proper mode to match. */
20495 if (diff == 1)
20496 tmp = copy_rtx (out);
20497 else
20498 {
20499 rtx out1;
20500 out1 = copy_rtx (out);
20501 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20502 nops++;
20503 if (diff & 1)
20504 {
20505 tmp = gen_rtx_PLUS (mode, tmp, out1);
20506 nops++;
20507 }
20508 }
20509 if (cf != 0)
20510 {
20511 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20512 nops++;
20513 }
20514 if (!rtx_equal_p (tmp, out))
20515 {
20516 if (nops == 1)
20517 out = force_operand (tmp, copy_rtx (out));
20518 else
20519 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20520 }
20521 if (!rtx_equal_p (out, operands[0]))
20522 emit_move_insn (operands[0], copy_rtx (out));
20523
20524 return true;
20525 }
20526
20527 /*
20528 * General case: Jumpful:
20529 * xorl dest,dest cmpl op1, op2
20530 * cmpl op1, op2 movl ct, dest
20531 * setcc dest jcc 1f
20532 * decl dest movl cf, dest
20533 * andl (cf-ct),dest 1:
20534 * addl ct,dest
20535 *
20536 * Size 20. Size 14.
20537 *
20538 * This is reasonably steep, but branch mispredict costs are
20539 * high on modern cpus, so consider failing only if optimizing
20540 * for space.
20541 */
20542
20543 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20544 && BRANCH_COST (optimize_insn_for_speed_p (),
20545 false) >= 2)
20546 {
20547 if (cf == 0)
20548 {
20549 enum machine_mode cmp_mode = GET_MODE (op0);
20550
20551 cf = ct;
20552 ct = 0;
20553
20554 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20555 {
20556 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20557
20558 /* We may be reversing unordered compare to normal compare,
20559 that is not valid in general (we may convert non-trapping
20560 condition to trapping one), however on i386 we currently
20561 emit all comparisons unordered. */
20562 code = reverse_condition_maybe_unordered (code);
20563 }
20564 else
20565 {
20566 code = reverse_condition (code);
20567 if (compare_code != UNKNOWN)
20568 compare_code = reverse_condition (compare_code);
20569 }
20570 }
20571
20572 if (compare_code != UNKNOWN)
20573 {
20574 /* notl op1 (if needed)
20575 sarl $31, op1
20576 andl (cf-ct), op1
20577 addl ct, op1
20578
20579 For x < 0 (resp. x <= -1) there will be no notl,
20580 so if possible swap the constants to get rid of the
20581 complement.
20582 True/false will be -1/0 while code below (store flag
20583 followed by decrement) is 0/-1, so the constants need
20584 to be exchanged once more. */
20585
20586 if (compare_code == GE || !cf)
20587 {
20588 code = reverse_condition (code);
20589 compare_code = LT;
20590 }
20591 else
20592 {
20593 HOST_WIDE_INT tmp = cf;
20594 cf = ct;
20595 ct = tmp;
20596 }
20597
20598 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20599 }
20600 else
20601 {
20602 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20603
20604 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20605 constm1_rtx,
20606 copy_rtx (out), 1, OPTAB_DIRECT);
20607 }
20608
20609 out = expand_simple_binop (mode, AND, copy_rtx (out),
20610 gen_int_mode (cf - ct, mode),
20611 copy_rtx (out), 1, OPTAB_DIRECT);
20612 if (ct)
20613 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20614 copy_rtx (out), 1, OPTAB_DIRECT);
20615 if (!rtx_equal_p (out, operands[0]))
20616 emit_move_insn (operands[0], copy_rtx (out));
20617
20618 return true;
20619 }
20620 }
20621
20622 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20623 {
20624 /* Try a few things more with specific constants and a variable. */
20625
20626 optab op;
20627 rtx var, orig_out, out, tmp;
20628
20629 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20630 return false;
20631
20632 /* If one of the two operands is an interesting constant, load a
20633 constant with the above and mask it in with a logical operation. */
20634
20635 if (CONST_INT_P (operands[2]))
20636 {
20637 var = operands[3];
20638 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20639 operands[3] = constm1_rtx, op = and_optab;
20640 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20641 operands[3] = const0_rtx, op = ior_optab;
20642 else
20643 return false;
20644 }
20645 else if (CONST_INT_P (operands[3]))
20646 {
20647 var = operands[2];
20648 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20649 operands[2] = constm1_rtx, op = and_optab;
20650 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20651 operands[2] = const0_rtx, op = ior_optab;
20652 else
20653 return false;
20654 }
20655 else
20656 return false;
20657
20658 orig_out = operands[0];
20659 tmp = gen_reg_rtx (mode);
20660 operands[0] = tmp;
20661
20662 /* Recurse to get the constant loaded. */
20663 if (ix86_expand_int_movcc (operands) == 0)
20664 return false;
20665
20666 /* Mask in the interesting variable. */
20667 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20668 OPTAB_WIDEN);
20669 if (!rtx_equal_p (out, orig_out))
20670 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20671
20672 return true;
20673 }
20674
20675 /*
20676 * For comparison with above,
20677 *
20678 * movl cf,dest
20679 * movl ct,tmp
20680 * cmpl op1,op2
20681 * cmovcc tmp,dest
20682 *
20683 * Size 15.
20684 */
20685
20686 if (! nonimmediate_operand (operands[2], mode))
20687 operands[2] = force_reg (mode, operands[2]);
20688 if (! nonimmediate_operand (operands[3], mode))
20689 operands[3] = force_reg (mode, operands[3]);
20690
20691 if (! register_operand (operands[2], VOIDmode)
20692 && (mode == QImode
20693 || ! register_operand (operands[3], VOIDmode)))
20694 operands[2] = force_reg (mode, operands[2]);
20695
20696 if (mode == QImode
20697 && ! register_operand (operands[3], VOIDmode))
20698 operands[3] = force_reg (mode, operands[3]);
20699
20700 emit_insn (compare_seq);
20701 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20702 gen_rtx_IF_THEN_ELSE (mode,
20703 compare_op, operands[2],
20704 operands[3])));
20705 return true;
20706 }
20707
20708 /* Swap, force into registers, or otherwise massage the two operands
20709 to an sse comparison with a mask result. Thus we differ a bit from
20710 ix86_prepare_fp_compare_args which expects to produce a flags result.
20711
20712 The DEST operand exists to help determine whether to commute commutative
20713 operators. The POP0/POP1 operands are updated in place. The new
20714 comparison code is returned, or UNKNOWN if not implementable. */
20715
20716 static enum rtx_code
20717 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20718 rtx *pop0, rtx *pop1)
20719 {
20720 rtx tmp;
20721
20722 switch (code)
20723 {
20724 case LTGT:
20725 case UNEQ:
20726 /* AVX supports all the needed comparisons. */
20727 if (TARGET_AVX)
20728 break;
20729 /* We have no LTGT as an operator. We could implement it with
20730 NE & ORDERED, but this requires an extra temporary. It's
20731 not clear that it's worth it. */
20732 return UNKNOWN;
20733
20734 case LT:
20735 case LE:
20736 case UNGT:
20737 case UNGE:
20738 /* These are supported directly. */
20739 break;
20740
20741 case EQ:
20742 case NE:
20743 case UNORDERED:
20744 case ORDERED:
20745 /* AVX has 3 operand comparisons, no need to swap anything. */
20746 if (TARGET_AVX)
20747 break;
20748 /* For commutative operators, try to canonicalize the destination
20749 operand to be first in the comparison - this helps reload to
20750 avoid extra moves. */
20751 if (!dest || !rtx_equal_p (dest, *pop1))
20752 break;
20753 /* FALLTHRU */
20754
20755 case GE:
20756 case GT:
20757 case UNLE:
20758 case UNLT:
20759 /* These are not supported directly before AVX, and furthermore
20760 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20761 comparison operands to transform into something that is
20762 supported. */
20763 tmp = *pop0;
20764 *pop0 = *pop1;
20765 *pop1 = tmp;
20766 code = swap_condition (code);
20767 break;
20768
20769 default:
20770 gcc_unreachable ();
20771 }
20772
20773 return code;
20774 }
20775
20776 /* Detect conditional moves that exactly match min/max operational
20777 semantics. Note that this is IEEE safe, as long as we don't
20778 interchange the operands.
20779
20780 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20781 and TRUE if the operation is successful and instructions are emitted. */
20782
20783 static bool
20784 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20785 rtx cmp_op1, rtx if_true, rtx if_false)
20786 {
20787 enum machine_mode mode;
20788 bool is_min;
20789 rtx tmp;
20790
20791 if (code == LT)
20792 ;
20793 else if (code == UNGE)
20794 {
20795 tmp = if_true;
20796 if_true = if_false;
20797 if_false = tmp;
20798 }
20799 else
20800 return false;
20801
20802 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20803 is_min = true;
20804 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20805 is_min = false;
20806 else
20807 return false;
20808
20809 mode = GET_MODE (dest);
20810
20811 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20812 but MODE may be a vector mode and thus not appropriate. */
20813 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20814 {
20815 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20816 rtvec v;
20817
20818 if_true = force_reg (mode, if_true);
20819 v = gen_rtvec (2, if_true, if_false);
20820 tmp = gen_rtx_UNSPEC (mode, v, u);
20821 }
20822 else
20823 {
20824 code = is_min ? SMIN : SMAX;
20825 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20826 }
20827
20828 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20829 return true;
20830 }
20831
20832 /* Expand an sse vector comparison. Return the register with the result. */
20833
20834 static rtx
20835 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20836 rtx op_true, rtx op_false)
20837 {
20838 enum machine_mode mode = GET_MODE (dest);
20839 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20840
20841 /* In general case result of comparison can differ from operands' type. */
20842 enum machine_mode cmp_mode;
20843
20844 /* In AVX512F the result of comparison is an integer mask. */
20845 bool maskcmp = false;
20846 rtx x;
20847
20848 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20849 {
20850 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20851 gcc_assert (cmp_mode != BLKmode);
20852
20853 maskcmp = true;
20854 }
20855 else
20856 cmp_mode = cmp_ops_mode;
20857
20858
20859 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20860 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20861 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20862
20863 if (optimize
20864 || reg_overlap_mentioned_p (dest, op_true)
20865 || reg_overlap_mentioned_p (dest, op_false))
20866 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20867
20868 /* Compare patterns for int modes are unspec in AVX512F only. */
20869 if (maskcmp && (code == GT || code == EQ))
20870 {
20871 rtx (*gen)(rtx, rtx, rtx);
20872
20873 switch (cmp_ops_mode)
20874 {
20875 case V16SImode:
20876 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20877 break;
20878 case V8DImode:
20879 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20880 break;
20881 default:
20882 gen = NULL;
20883 }
20884
20885 if (gen)
20886 {
20887 emit_insn (gen (dest, cmp_op0, cmp_op1));
20888 return dest;
20889 }
20890 }
20891 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20892
20893 if (cmp_mode != mode && !maskcmp)
20894 {
20895 x = force_reg (cmp_ops_mode, x);
20896 convert_move (dest, x, false);
20897 }
20898 else
20899 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20900
20901 return dest;
20902 }
20903
20904 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20905 operations. This is used for both scalar and vector conditional moves. */
20906
20907 static void
20908 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20909 {
20910 enum machine_mode mode = GET_MODE (dest);
20911 enum machine_mode cmpmode = GET_MODE (cmp);
20912
20913 /* In AVX512F the result of comparison is an integer mask. */
20914 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20915
20916 rtx t2, t3, x;
20917
20918 if (vector_all_ones_operand (op_true, mode)
20919 && rtx_equal_p (op_false, CONST0_RTX (mode))
20920 && !maskcmp)
20921 {
20922 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20923 }
20924 else if (op_false == CONST0_RTX (mode)
20925 && !maskcmp)
20926 {
20927 op_true = force_reg (mode, op_true);
20928 x = gen_rtx_AND (mode, cmp, op_true);
20929 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20930 }
20931 else if (op_true == CONST0_RTX (mode)
20932 && !maskcmp)
20933 {
20934 op_false = force_reg (mode, op_false);
20935 x = gen_rtx_NOT (mode, cmp);
20936 x = gen_rtx_AND (mode, x, op_false);
20937 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20938 }
20939 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20940 && !maskcmp)
20941 {
20942 op_false = force_reg (mode, op_false);
20943 x = gen_rtx_IOR (mode, cmp, op_false);
20944 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20945 }
20946 else if (TARGET_XOP
20947 && !maskcmp)
20948 {
20949 op_true = force_reg (mode, op_true);
20950
20951 if (!nonimmediate_operand (op_false, mode))
20952 op_false = force_reg (mode, op_false);
20953
20954 emit_insn (gen_rtx_SET (mode, dest,
20955 gen_rtx_IF_THEN_ELSE (mode, cmp,
20956 op_true,
20957 op_false)));
20958 }
20959 else
20960 {
20961 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20962 rtx d = dest;
20963
20964 if (!nonimmediate_operand (op_true, mode))
20965 op_true = force_reg (mode, op_true);
20966
20967 op_false = force_reg (mode, op_false);
20968
20969 switch (mode)
20970 {
20971 case V4SFmode:
20972 if (TARGET_SSE4_1)
20973 gen = gen_sse4_1_blendvps;
20974 break;
20975 case V2DFmode:
20976 if (TARGET_SSE4_1)
20977 gen = gen_sse4_1_blendvpd;
20978 break;
20979 case V16QImode:
20980 case V8HImode:
20981 case V4SImode:
20982 case V2DImode:
20983 if (TARGET_SSE4_1)
20984 {
20985 gen = gen_sse4_1_pblendvb;
20986 if (mode != V16QImode)
20987 d = gen_reg_rtx (V16QImode);
20988 op_false = gen_lowpart (V16QImode, op_false);
20989 op_true = gen_lowpart (V16QImode, op_true);
20990 cmp = gen_lowpart (V16QImode, cmp);
20991 }
20992 break;
20993 case V8SFmode:
20994 if (TARGET_AVX)
20995 gen = gen_avx_blendvps256;
20996 break;
20997 case V4DFmode:
20998 if (TARGET_AVX)
20999 gen = gen_avx_blendvpd256;
21000 break;
21001 case V32QImode:
21002 case V16HImode:
21003 case V8SImode:
21004 case V4DImode:
21005 if (TARGET_AVX2)
21006 {
21007 gen = gen_avx2_pblendvb;
21008 if (mode != V32QImode)
21009 d = gen_reg_rtx (V32QImode);
21010 op_false = gen_lowpart (V32QImode, op_false);
21011 op_true = gen_lowpart (V32QImode, op_true);
21012 cmp = gen_lowpart (V32QImode, cmp);
21013 }
21014 break;
21015
21016 case V16SImode:
21017 gen = gen_avx512f_blendmv16si;
21018 break;
21019 case V8DImode:
21020 gen = gen_avx512f_blendmv8di;
21021 break;
21022 case V8DFmode:
21023 gen = gen_avx512f_blendmv8df;
21024 break;
21025 case V16SFmode:
21026 gen = gen_avx512f_blendmv16sf;
21027 break;
21028
21029 default:
21030 break;
21031 }
21032
21033 if (gen != NULL)
21034 {
21035 emit_insn (gen (d, op_false, op_true, cmp));
21036 if (d != dest)
21037 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21038 }
21039 else
21040 {
21041 op_true = force_reg (mode, op_true);
21042
21043 t2 = gen_reg_rtx (mode);
21044 if (optimize)
21045 t3 = gen_reg_rtx (mode);
21046 else
21047 t3 = dest;
21048
21049 x = gen_rtx_AND (mode, op_true, cmp);
21050 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21051
21052 x = gen_rtx_NOT (mode, cmp);
21053 x = gen_rtx_AND (mode, x, op_false);
21054 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21055
21056 x = gen_rtx_IOR (mode, t3, t2);
21057 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21058 }
21059 }
21060 }
21061
21062 /* Expand a floating-point conditional move. Return true if successful. */
21063
21064 bool
21065 ix86_expand_fp_movcc (rtx operands[])
21066 {
21067 enum machine_mode mode = GET_MODE (operands[0]);
21068 enum rtx_code code = GET_CODE (operands[1]);
21069 rtx tmp, compare_op;
21070 rtx op0 = XEXP (operands[1], 0);
21071 rtx op1 = XEXP (operands[1], 1);
21072
21073 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21074 {
21075 enum machine_mode cmode;
21076
21077 /* Since we've no cmove for sse registers, don't force bad register
21078 allocation just to gain access to it. Deny movcc when the
21079 comparison mode doesn't match the move mode. */
21080 cmode = GET_MODE (op0);
21081 if (cmode == VOIDmode)
21082 cmode = GET_MODE (op1);
21083 if (cmode != mode)
21084 return false;
21085
21086 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21087 if (code == UNKNOWN)
21088 return false;
21089
21090 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21091 operands[2], operands[3]))
21092 return true;
21093
21094 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21095 operands[2], operands[3]);
21096 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21097 return true;
21098 }
21099
21100 if (GET_MODE (op0) == TImode
21101 || (GET_MODE (op0) == DImode
21102 && !TARGET_64BIT))
21103 return false;
21104
21105 /* The floating point conditional move instructions don't directly
21106 support conditions resulting from a signed integer comparison. */
21107
21108 compare_op = ix86_expand_compare (code, op0, op1);
21109 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21110 {
21111 tmp = gen_reg_rtx (QImode);
21112 ix86_expand_setcc (tmp, code, op0, op1);
21113
21114 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21115 }
21116
21117 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21118 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21119 operands[2], operands[3])));
21120
21121 return true;
21122 }
21123
21124 /* Expand a floating-point vector conditional move; a vcond operation
21125 rather than a movcc operation. */
21126
21127 bool
21128 ix86_expand_fp_vcond (rtx operands[])
21129 {
21130 enum rtx_code code = GET_CODE (operands[3]);
21131 rtx cmp;
21132
21133 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21134 &operands[4], &operands[5]);
21135 if (code == UNKNOWN)
21136 {
21137 rtx temp;
21138 switch (GET_CODE (operands[3]))
21139 {
21140 case LTGT:
21141 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21142 operands[5], operands[0], operands[0]);
21143 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21144 operands[5], operands[1], operands[2]);
21145 code = AND;
21146 break;
21147 case UNEQ:
21148 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21149 operands[5], operands[0], operands[0]);
21150 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21151 operands[5], operands[1], operands[2]);
21152 code = IOR;
21153 break;
21154 default:
21155 gcc_unreachable ();
21156 }
21157 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21158 OPTAB_DIRECT);
21159 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21160 return true;
21161 }
21162
21163 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21164 operands[5], operands[1], operands[2]))
21165 return true;
21166
21167 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21168 operands[1], operands[2]);
21169 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21170 return true;
21171 }
21172
21173 /* Expand a signed/unsigned integral vector conditional move. */
21174
21175 bool
21176 ix86_expand_int_vcond (rtx operands[])
21177 {
21178 enum machine_mode data_mode = GET_MODE (operands[0]);
21179 enum machine_mode mode = GET_MODE (operands[4]);
21180 enum rtx_code code = GET_CODE (operands[3]);
21181 bool negate = false;
21182 rtx x, cop0, cop1;
21183
21184 cop0 = operands[4];
21185 cop1 = operands[5];
21186
21187 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21188 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21189 if ((code == LT || code == GE)
21190 && data_mode == mode
21191 && cop1 == CONST0_RTX (mode)
21192 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21193 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21194 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21195 && (GET_MODE_SIZE (data_mode) == 16
21196 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21197 {
21198 rtx negop = operands[2 - (code == LT)];
21199 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21200 if (negop == CONST1_RTX (data_mode))
21201 {
21202 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21203 operands[0], 1, OPTAB_DIRECT);
21204 if (res != operands[0])
21205 emit_move_insn (operands[0], res);
21206 return true;
21207 }
21208 else if (GET_MODE_INNER (data_mode) != DImode
21209 && vector_all_ones_operand (negop, data_mode))
21210 {
21211 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21212 operands[0], 0, OPTAB_DIRECT);
21213 if (res != operands[0])
21214 emit_move_insn (operands[0], res);
21215 return true;
21216 }
21217 }
21218
21219 if (!nonimmediate_operand (cop1, mode))
21220 cop1 = force_reg (mode, cop1);
21221 if (!general_operand (operands[1], data_mode))
21222 operands[1] = force_reg (data_mode, operands[1]);
21223 if (!general_operand (operands[2], data_mode))
21224 operands[2] = force_reg (data_mode, operands[2]);
21225
21226 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21227 if (TARGET_XOP
21228 && (mode == V16QImode || mode == V8HImode
21229 || mode == V4SImode || mode == V2DImode))
21230 ;
21231 else
21232 {
21233 /* Canonicalize the comparison to EQ, GT, GTU. */
21234 switch (code)
21235 {
21236 case EQ:
21237 case GT:
21238 case GTU:
21239 break;
21240
21241 case NE:
21242 case LE:
21243 case LEU:
21244 code = reverse_condition (code);
21245 negate = true;
21246 break;
21247
21248 case GE:
21249 case GEU:
21250 code = reverse_condition (code);
21251 negate = true;
21252 /* FALLTHRU */
21253
21254 case LT:
21255 case LTU:
21256 code = swap_condition (code);
21257 x = cop0, cop0 = cop1, cop1 = x;
21258 break;
21259
21260 default:
21261 gcc_unreachable ();
21262 }
21263
21264 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21265 if (mode == V2DImode)
21266 {
21267 switch (code)
21268 {
21269 case EQ:
21270 /* SSE4.1 supports EQ. */
21271 if (!TARGET_SSE4_1)
21272 return false;
21273 break;
21274
21275 case GT:
21276 case GTU:
21277 /* SSE4.2 supports GT/GTU. */
21278 if (!TARGET_SSE4_2)
21279 return false;
21280 break;
21281
21282 default:
21283 gcc_unreachable ();
21284 }
21285 }
21286
21287 /* Unsigned parallel compare is not supported by the hardware.
21288 Play some tricks to turn this into a signed comparison
21289 against 0. */
21290 if (code == GTU)
21291 {
21292 cop0 = force_reg (mode, cop0);
21293
21294 switch (mode)
21295 {
21296 case V16SImode:
21297 case V8DImode:
21298 case V8SImode:
21299 case V4DImode:
21300 case V4SImode:
21301 case V2DImode:
21302 {
21303 rtx t1, t2, mask;
21304 rtx (*gen_sub3) (rtx, rtx, rtx);
21305
21306 switch (mode)
21307 {
21308 case V16SImode: gen_sub3 = gen_subv16si3; break;
21309 case V8DImode: gen_sub3 = gen_subv8di3; break;
21310 case V8SImode: gen_sub3 = gen_subv8si3; break;
21311 case V4DImode: gen_sub3 = gen_subv4di3; break;
21312 case V4SImode: gen_sub3 = gen_subv4si3; break;
21313 case V2DImode: gen_sub3 = gen_subv2di3; break;
21314 default:
21315 gcc_unreachable ();
21316 }
21317 /* Subtract (-(INT MAX) - 1) from both operands to make
21318 them signed. */
21319 mask = ix86_build_signbit_mask (mode, true, false);
21320 t1 = gen_reg_rtx (mode);
21321 emit_insn (gen_sub3 (t1, cop0, mask));
21322
21323 t2 = gen_reg_rtx (mode);
21324 emit_insn (gen_sub3 (t2, cop1, mask));
21325
21326 cop0 = t1;
21327 cop1 = t2;
21328 code = GT;
21329 }
21330 break;
21331
21332 case V32QImode:
21333 case V16HImode:
21334 case V16QImode:
21335 case V8HImode:
21336 /* Perform a parallel unsigned saturating subtraction. */
21337 x = gen_reg_rtx (mode);
21338 emit_insn (gen_rtx_SET (VOIDmode, x,
21339 gen_rtx_US_MINUS (mode, cop0, cop1)));
21340
21341 cop0 = x;
21342 cop1 = CONST0_RTX (mode);
21343 code = EQ;
21344 negate = !negate;
21345 break;
21346
21347 default:
21348 gcc_unreachable ();
21349 }
21350 }
21351 }
21352
21353 /* Allow the comparison to be done in one mode, but the movcc to
21354 happen in another mode. */
21355 if (data_mode == mode)
21356 {
21357 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21358 operands[1+negate], operands[2-negate]);
21359 }
21360 else
21361 {
21362 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21363 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21364 operands[1+negate], operands[2-negate]);
21365 if (GET_MODE (x) == mode)
21366 x = gen_lowpart (data_mode, x);
21367 }
21368
21369 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21370 operands[2-negate]);
21371 return true;
21372 }
21373
21374 static bool
21375 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21376 {
21377 enum machine_mode mode = GET_MODE (op0);
21378 switch (mode)
21379 {
21380 case V16SImode:
21381 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21382 force_reg (V16SImode, mask),
21383 op1));
21384 return true;
21385 case V16SFmode:
21386 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21387 force_reg (V16SImode, mask),
21388 op1));
21389 return true;
21390 case V8DImode:
21391 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21392 force_reg (V8DImode, mask), op1));
21393 return true;
21394 case V8DFmode:
21395 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21396 force_reg (V8DImode, mask), op1));
21397 return true;
21398 default:
21399 return false;
21400 }
21401 }
21402
21403 /* Expand a variable vector permutation. */
21404
21405 void
21406 ix86_expand_vec_perm (rtx operands[])
21407 {
21408 rtx target = operands[0];
21409 rtx op0 = operands[1];
21410 rtx op1 = operands[2];
21411 rtx mask = operands[3];
21412 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21413 enum machine_mode mode = GET_MODE (op0);
21414 enum machine_mode maskmode = GET_MODE (mask);
21415 int w, e, i;
21416 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21417
21418 /* Number of elements in the vector. */
21419 w = GET_MODE_NUNITS (mode);
21420 e = GET_MODE_UNIT_SIZE (mode);
21421 gcc_assert (w <= 64);
21422
21423 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21424 return;
21425
21426 if (TARGET_AVX2)
21427 {
21428 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21429 {
21430 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21431 an constant shuffle operand. With a tiny bit of effort we can
21432 use VPERMD instead. A re-interpretation stall for V4DFmode is
21433 unfortunate but there's no avoiding it.
21434 Similarly for V16HImode we don't have instructions for variable
21435 shuffling, while for V32QImode we can use after preparing suitable
21436 masks vpshufb; vpshufb; vpermq; vpor. */
21437
21438 if (mode == V16HImode)
21439 {
21440 maskmode = mode = V32QImode;
21441 w = 32;
21442 e = 1;
21443 }
21444 else
21445 {
21446 maskmode = mode = V8SImode;
21447 w = 8;
21448 e = 4;
21449 }
21450 t1 = gen_reg_rtx (maskmode);
21451
21452 /* Replicate the low bits of the V4DImode mask into V8SImode:
21453 mask = { A B C D }
21454 t1 = { A A B B C C D D }. */
21455 for (i = 0; i < w / 2; ++i)
21456 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21457 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21458 vt = force_reg (maskmode, vt);
21459 mask = gen_lowpart (maskmode, mask);
21460 if (maskmode == V8SImode)
21461 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21462 else
21463 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21464
21465 /* Multiply the shuffle indicies by two. */
21466 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21467 OPTAB_DIRECT);
21468
21469 /* Add one to the odd shuffle indicies:
21470 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21471 for (i = 0; i < w / 2; ++i)
21472 {
21473 vec[i * 2] = const0_rtx;
21474 vec[i * 2 + 1] = const1_rtx;
21475 }
21476 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21477 vt = validize_mem (force_const_mem (maskmode, vt));
21478 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21479 OPTAB_DIRECT);
21480
21481 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21482 operands[3] = mask = t1;
21483 target = gen_reg_rtx (mode);
21484 op0 = gen_lowpart (mode, op0);
21485 op1 = gen_lowpart (mode, op1);
21486 }
21487
21488 switch (mode)
21489 {
21490 case V8SImode:
21491 /* The VPERMD and VPERMPS instructions already properly ignore
21492 the high bits of the shuffle elements. No need for us to
21493 perform an AND ourselves. */
21494 if (one_operand_shuffle)
21495 {
21496 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21497 if (target != operands[0])
21498 emit_move_insn (operands[0],
21499 gen_lowpart (GET_MODE (operands[0]), target));
21500 }
21501 else
21502 {
21503 t1 = gen_reg_rtx (V8SImode);
21504 t2 = gen_reg_rtx (V8SImode);
21505 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21506 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21507 goto merge_two;
21508 }
21509 return;
21510
21511 case V8SFmode:
21512 mask = gen_lowpart (V8SImode, mask);
21513 if (one_operand_shuffle)
21514 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21515 else
21516 {
21517 t1 = gen_reg_rtx (V8SFmode);
21518 t2 = gen_reg_rtx (V8SFmode);
21519 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21520 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21521 goto merge_two;
21522 }
21523 return;
21524
21525 case V4SImode:
21526 /* By combining the two 128-bit input vectors into one 256-bit
21527 input vector, we can use VPERMD and VPERMPS for the full
21528 two-operand shuffle. */
21529 t1 = gen_reg_rtx (V8SImode);
21530 t2 = gen_reg_rtx (V8SImode);
21531 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21532 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21533 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21534 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21535 return;
21536
21537 case V4SFmode:
21538 t1 = gen_reg_rtx (V8SFmode);
21539 t2 = gen_reg_rtx (V8SImode);
21540 mask = gen_lowpart (V4SImode, mask);
21541 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21542 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21543 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21544 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21545 return;
21546
21547 case V32QImode:
21548 t1 = gen_reg_rtx (V32QImode);
21549 t2 = gen_reg_rtx (V32QImode);
21550 t3 = gen_reg_rtx (V32QImode);
21551 vt2 = GEN_INT (-128);
21552 for (i = 0; i < 32; i++)
21553 vec[i] = vt2;
21554 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21555 vt = force_reg (V32QImode, vt);
21556 for (i = 0; i < 32; i++)
21557 vec[i] = i < 16 ? vt2 : const0_rtx;
21558 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21559 vt2 = force_reg (V32QImode, vt2);
21560 /* From mask create two adjusted masks, which contain the same
21561 bits as mask in the low 7 bits of each vector element.
21562 The first mask will have the most significant bit clear
21563 if it requests element from the same 128-bit lane
21564 and MSB set if it requests element from the other 128-bit lane.
21565 The second mask will have the opposite values of the MSB,
21566 and additionally will have its 128-bit lanes swapped.
21567 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21568 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21569 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21570 stands for other 12 bytes. */
21571 /* The bit whether element is from the same lane or the other
21572 lane is bit 4, so shift it up by 3 to the MSB position. */
21573 t5 = gen_reg_rtx (V4DImode);
21574 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21575 GEN_INT (3)));
21576 /* Clear MSB bits from the mask just in case it had them set. */
21577 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21578 /* After this t1 will have MSB set for elements from other lane. */
21579 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21580 /* Clear bits other than MSB. */
21581 emit_insn (gen_andv32qi3 (t1, t1, vt));
21582 /* Or in the lower bits from mask into t3. */
21583 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21584 /* And invert MSB bits in t1, so MSB is set for elements from the same
21585 lane. */
21586 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21587 /* Swap 128-bit lanes in t3. */
21588 t6 = gen_reg_rtx (V4DImode);
21589 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21590 const2_rtx, GEN_INT (3),
21591 const0_rtx, const1_rtx));
21592 /* And or in the lower bits from mask into t1. */
21593 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21594 if (one_operand_shuffle)
21595 {
21596 /* Each of these shuffles will put 0s in places where
21597 element from the other 128-bit lane is needed, otherwise
21598 will shuffle in the requested value. */
21599 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21600 gen_lowpart (V32QImode, t6)));
21601 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21602 /* For t3 the 128-bit lanes are swapped again. */
21603 t7 = gen_reg_rtx (V4DImode);
21604 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21605 const2_rtx, GEN_INT (3),
21606 const0_rtx, const1_rtx));
21607 /* And oring both together leads to the result. */
21608 emit_insn (gen_iorv32qi3 (target, t1,
21609 gen_lowpart (V32QImode, t7)));
21610 if (target != operands[0])
21611 emit_move_insn (operands[0],
21612 gen_lowpart (GET_MODE (operands[0]), target));
21613 return;
21614 }
21615
21616 t4 = gen_reg_rtx (V32QImode);
21617 /* Similarly to the above one_operand_shuffle code,
21618 just for repeated twice for each operand. merge_two:
21619 code will merge the two results together. */
21620 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21621 gen_lowpart (V32QImode, t6)));
21622 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21623 gen_lowpart (V32QImode, t6)));
21624 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21625 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21626 t7 = gen_reg_rtx (V4DImode);
21627 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21628 const2_rtx, GEN_INT (3),
21629 const0_rtx, const1_rtx));
21630 t8 = gen_reg_rtx (V4DImode);
21631 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21632 const2_rtx, GEN_INT (3),
21633 const0_rtx, const1_rtx));
21634 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21635 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21636 t1 = t4;
21637 t2 = t3;
21638 goto merge_two;
21639
21640 default:
21641 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21642 break;
21643 }
21644 }
21645
21646 if (TARGET_XOP)
21647 {
21648 /* The XOP VPPERM insn supports three inputs. By ignoring the
21649 one_operand_shuffle special case, we avoid creating another
21650 set of constant vectors in memory. */
21651 one_operand_shuffle = false;
21652
21653 /* mask = mask & {2*w-1, ...} */
21654 vt = GEN_INT (2*w - 1);
21655 }
21656 else
21657 {
21658 /* mask = mask & {w-1, ...} */
21659 vt = GEN_INT (w - 1);
21660 }
21661
21662 for (i = 0; i < w; i++)
21663 vec[i] = vt;
21664 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21665 mask = expand_simple_binop (maskmode, AND, mask, vt,
21666 NULL_RTX, 0, OPTAB_DIRECT);
21667
21668 /* For non-QImode operations, convert the word permutation control
21669 into a byte permutation control. */
21670 if (mode != V16QImode)
21671 {
21672 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21673 GEN_INT (exact_log2 (e)),
21674 NULL_RTX, 0, OPTAB_DIRECT);
21675
21676 /* Convert mask to vector of chars. */
21677 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21678
21679 /* Replicate each of the input bytes into byte positions:
21680 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21681 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21682 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21683 for (i = 0; i < 16; ++i)
21684 vec[i] = GEN_INT (i/e * e);
21685 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21686 vt = validize_mem (force_const_mem (V16QImode, vt));
21687 if (TARGET_XOP)
21688 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21689 else
21690 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21691
21692 /* Convert it into the byte positions by doing
21693 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21694 for (i = 0; i < 16; ++i)
21695 vec[i] = GEN_INT (i % e);
21696 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21697 vt = validize_mem (force_const_mem (V16QImode, vt));
21698 emit_insn (gen_addv16qi3 (mask, mask, vt));
21699 }
21700
21701 /* The actual shuffle operations all operate on V16QImode. */
21702 op0 = gen_lowpart (V16QImode, op0);
21703 op1 = gen_lowpart (V16QImode, op1);
21704
21705 if (TARGET_XOP)
21706 {
21707 if (GET_MODE (target) != V16QImode)
21708 target = gen_reg_rtx (V16QImode);
21709 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21710 if (target != operands[0])
21711 emit_move_insn (operands[0],
21712 gen_lowpart (GET_MODE (operands[0]), target));
21713 }
21714 else if (one_operand_shuffle)
21715 {
21716 if (GET_MODE (target) != V16QImode)
21717 target = gen_reg_rtx (V16QImode);
21718 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21719 if (target != operands[0])
21720 emit_move_insn (operands[0],
21721 gen_lowpart (GET_MODE (operands[0]), target));
21722 }
21723 else
21724 {
21725 rtx xops[6];
21726 bool ok;
21727
21728 /* Shuffle the two input vectors independently. */
21729 t1 = gen_reg_rtx (V16QImode);
21730 t2 = gen_reg_rtx (V16QImode);
21731 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21732 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21733
21734 merge_two:
21735 /* Then merge them together. The key is whether any given control
21736 element contained a bit set that indicates the second word. */
21737 mask = operands[3];
21738 vt = GEN_INT (w);
21739 if (maskmode == V2DImode && !TARGET_SSE4_1)
21740 {
21741 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21742 more shuffle to convert the V2DI input mask into a V4SI
21743 input mask. At which point the masking that expand_int_vcond
21744 will work as desired. */
21745 rtx t3 = gen_reg_rtx (V4SImode);
21746 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21747 const0_rtx, const0_rtx,
21748 const2_rtx, const2_rtx));
21749 mask = t3;
21750 maskmode = V4SImode;
21751 e = w = 4;
21752 }
21753
21754 for (i = 0; i < w; i++)
21755 vec[i] = vt;
21756 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21757 vt = force_reg (maskmode, vt);
21758 mask = expand_simple_binop (maskmode, AND, mask, vt,
21759 NULL_RTX, 0, OPTAB_DIRECT);
21760
21761 if (GET_MODE (target) != mode)
21762 target = gen_reg_rtx (mode);
21763 xops[0] = target;
21764 xops[1] = gen_lowpart (mode, t2);
21765 xops[2] = gen_lowpart (mode, t1);
21766 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21767 xops[4] = mask;
21768 xops[5] = vt;
21769 ok = ix86_expand_int_vcond (xops);
21770 gcc_assert (ok);
21771 if (target != operands[0])
21772 emit_move_insn (operands[0],
21773 gen_lowpart (GET_MODE (operands[0]), target));
21774 }
21775 }
21776
21777 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21778 true if we should do zero extension, else sign extension. HIGH_P is
21779 true if we want the N/2 high elements, else the low elements. */
21780
21781 void
21782 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21783 {
21784 enum machine_mode imode = GET_MODE (src);
21785 rtx tmp;
21786
21787 if (TARGET_SSE4_1)
21788 {
21789 rtx (*unpack)(rtx, rtx);
21790 rtx (*extract)(rtx, rtx) = NULL;
21791 enum machine_mode halfmode = BLKmode;
21792
21793 switch (imode)
21794 {
21795 case V32QImode:
21796 if (unsigned_p)
21797 unpack = gen_avx2_zero_extendv16qiv16hi2;
21798 else
21799 unpack = gen_avx2_sign_extendv16qiv16hi2;
21800 halfmode = V16QImode;
21801 extract
21802 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21803 break;
21804 case V32HImode:
21805 if (unsigned_p)
21806 unpack = gen_avx512f_zero_extendv16hiv16si2;
21807 else
21808 unpack = gen_avx512f_sign_extendv16hiv16si2;
21809 halfmode = V16HImode;
21810 extract
21811 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21812 break;
21813 case V16HImode:
21814 if (unsigned_p)
21815 unpack = gen_avx2_zero_extendv8hiv8si2;
21816 else
21817 unpack = gen_avx2_sign_extendv8hiv8si2;
21818 halfmode = V8HImode;
21819 extract
21820 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21821 break;
21822 case V16SImode:
21823 if (unsigned_p)
21824 unpack = gen_avx512f_zero_extendv8siv8di2;
21825 else
21826 unpack = gen_avx512f_sign_extendv8siv8di2;
21827 halfmode = V8SImode;
21828 extract
21829 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21830 break;
21831 case V8SImode:
21832 if (unsigned_p)
21833 unpack = gen_avx2_zero_extendv4siv4di2;
21834 else
21835 unpack = gen_avx2_sign_extendv4siv4di2;
21836 halfmode = V4SImode;
21837 extract
21838 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21839 break;
21840 case V16QImode:
21841 if (unsigned_p)
21842 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21843 else
21844 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21845 break;
21846 case V8HImode:
21847 if (unsigned_p)
21848 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21849 else
21850 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21851 break;
21852 case V4SImode:
21853 if (unsigned_p)
21854 unpack = gen_sse4_1_zero_extendv2siv2di2;
21855 else
21856 unpack = gen_sse4_1_sign_extendv2siv2di2;
21857 break;
21858 default:
21859 gcc_unreachable ();
21860 }
21861
21862 if (GET_MODE_SIZE (imode) >= 32)
21863 {
21864 tmp = gen_reg_rtx (halfmode);
21865 emit_insn (extract (tmp, src));
21866 }
21867 else if (high_p)
21868 {
21869 /* Shift higher 8 bytes to lower 8 bytes. */
21870 tmp = gen_reg_rtx (V1TImode);
21871 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21872 GEN_INT (64)));
21873 tmp = gen_lowpart (imode, tmp);
21874 }
21875 else
21876 tmp = src;
21877
21878 emit_insn (unpack (dest, tmp));
21879 }
21880 else
21881 {
21882 rtx (*unpack)(rtx, rtx, rtx);
21883
21884 switch (imode)
21885 {
21886 case V16QImode:
21887 if (high_p)
21888 unpack = gen_vec_interleave_highv16qi;
21889 else
21890 unpack = gen_vec_interleave_lowv16qi;
21891 break;
21892 case V8HImode:
21893 if (high_p)
21894 unpack = gen_vec_interleave_highv8hi;
21895 else
21896 unpack = gen_vec_interleave_lowv8hi;
21897 break;
21898 case V4SImode:
21899 if (high_p)
21900 unpack = gen_vec_interleave_highv4si;
21901 else
21902 unpack = gen_vec_interleave_lowv4si;
21903 break;
21904 default:
21905 gcc_unreachable ();
21906 }
21907
21908 if (unsigned_p)
21909 tmp = force_reg (imode, CONST0_RTX (imode));
21910 else
21911 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21912 src, pc_rtx, pc_rtx);
21913
21914 rtx tmp2 = gen_reg_rtx (imode);
21915 emit_insn (unpack (tmp2, src, tmp));
21916 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21917 }
21918 }
21919
21920 /* Expand conditional increment or decrement using adb/sbb instructions.
21921 The default case using setcc followed by the conditional move can be
21922 done by generic code. */
21923 bool
21924 ix86_expand_int_addcc (rtx operands[])
21925 {
21926 enum rtx_code code = GET_CODE (operands[1]);
21927 rtx flags;
21928 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21929 rtx compare_op;
21930 rtx val = const0_rtx;
21931 bool fpcmp = false;
21932 enum machine_mode mode;
21933 rtx op0 = XEXP (operands[1], 0);
21934 rtx op1 = XEXP (operands[1], 1);
21935
21936 if (operands[3] != const1_rtx
21937 && operands[3] != constm1_rtx)
21938 return false;
21939 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21940 return false;
21941 code = GET_CODE (compare_op);
21942
21943 flags = XEXP (compare_op, 0);
21944
21945 if (GET_MODE (flags) == CCFPmode
21946 || GET_MODE (flags) == CCFPUmode)
21947 {
21948 fpcmp = true;
21949 code = ix86_fp_compare_code_to_integer (code);
21950 }
21951
21952 if (code != LTU)
21953 {
21954 val = constm1_rtx;
21955 if (fpcmp)
21956 PUT_CODE (compare_op,
21957 reverse_condition_maybe_unordered
21958 (GET_CODE (compare_op)));
21959 else
21960 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21961 }
21962
21963 mode = GET_MODE (operands[0]);
21964
21965 /* Construct either adc or sbb insn. */
21966 if ((code == LTU) == (operands[3] == constm1_rtx))
21967 {
21968 switch (mode)
21969 {
21970 case QImode:
21971 insn = gen_subqi3_carry;
21972 break;
21973 case HImode:
21974 insn = gen_subhi3_carry;
21975 break;
21976 case SImode:
21977 insn = gen_subsi3_carry;
21978 break;
21979 case DImode:
21980 insn = gen_subdi3_carry;
21981 break;
21982 default:
21983 gcc_unreachable ();
21984 }
21985 }
21986 else
21987 {
21988 switch (mode)
21989 {
21990 case QImode:
21991 insn = gen_addqi3_carry;
21992 break;
21993 case HImode:
21994 insn = gen_addhi3_carry;
21995 break;
21996 case SImode:
21997 insn = gen_addsi3_carry;
21998 break;
21999 case DImode:
22000 insn = gen_adddi3_carry;
22001 break;
22002 default:
22003 gcc_unreachable ();
22004 }
22005 }
22006 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22007
22008 return true;
22009 }
22010
22011
22012 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22013 but works for floating pointer parameters and nonoffsetable memories.
22014 For pushes, it returns just stack offsets; the values will be saved
22015 in the right order. Maximally three parts are generated. */
22016
22017 static int
22018 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22019 {
22020 int size;
22021
22022 if (!TARGET_64BIT)
22023 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22024 else
22025 size = (GET_MODE_SIZE (mode) + 4) / 8;
22026
22027 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22028 gcc_assert (size >= 2 && size <= 4);
22029
22030 /* Optimize constant pool reference to immediates. This is used by fp
22031 moves, that force all constants to memory to allow combining. */
22032 if (MEM_P (operand) && MEM_READONLY_P (operand))
22033 {
22034 rtx tmp = maybe_get_pool_constant (operand);
22035 if (tmp)
22036 operand = tmp;
22037 }
22038
22039 if (MEM_P (operand) && !offsettable_memref_p (operand))
22040 {
22041 /* The only non-offsetable memories we handle are pushes. */
22042 int ok = push_operand (operand, VOIDmode);
22043
22044 gcc_assert (ok);
22045
22046 operand = copy_rtx (operand);
22047 PUT_MODE (operand, word_mode);
22048 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22049 return size;
22050 }
22051
22052 if (GET_CODE (operand) == CONST_VECTOR)
22053 {
22054 enum machine_mode imode = int_mode_for_mode (mode);
22055 /* Caution: if we looked through a constant pool memory above,
22056 the operand may actually have a different mode now. That's
22057 ok, since we want to pun this all the way back to an integer. */
22058 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22059 gcc_assert (operand != NULL);
22060 mode = imode;
22061 }
22062
22063 if (!TARGET_64BIT)
22064 {
22065 if (mode == DImode)
22066 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22067 else
22068 {
22069 int i;
22070
22071 if (REG_P (operand))
22072 {
22073 gcc_assert (reload_completed);
22074 for (i = 0; i < size; i++)
22075 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22076 }
22077 else if (offsettable_memref_p (operand))
22078 {
22079 operand = adjust_address (operand, SImode, 0);
22080 parts[0] = operand;
22081 for (i = 1; i < size; i++)
22082 parts[i] = adjust_address (operand, SImode, 4 * i);
22083 }
22084 else if (GET_CODE (operand) == CONST_DOUBLE)
22085 {
22086 REAL_VALUE_TYPE r;
22087 long l[4];
22088
22089 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22090 switch (mode)
22091 {
22092 case TFmode:
22093 real_to_target (l, &r, mode);
22094 parts[3] = gen_int_mode (l[3], SImode);
22095 parts[2] = gen_int_mode (l[2], SImode);
22096 break;
22097 case XFmode:
22098 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22099 long double may not be 80-bit. */
22100 real_to_target (l, &r, mode);
22101 parts[2] = gen_int_mode (l[2], SImode);
22102 break;
22103 case DFmode:
22104 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22105 break;
22106 default:
22107 gcc_unreachable ();
22108 }
22109 parts[1] = gen_int_mode (l[1], SImode);
22110 parts[0] = gen_int_mode (l[0], SImode);
22111 }
22112 else
22113 gcc_unreachable ();
22114 }
22115 }
22116 else
22117 {
22118 if (mode == TImode)
22119 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22120 if (mode == XFmode || mode == TFmode)
22121 {
22122 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22123 if (REG_P (operand))
22124 {
22125 gcc_assert (reload_completed);
22126 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22127 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22128 }
22129 else if (offsettable_memref_p (operand))
22130 {
22131 operand = adjust_address (operand, DImode, 0);
22132 parts[0] = operand;
22133 parts[1] = adjust_address (operand, upper_mode, 8);
22134 }
22135 else if (GET_CODE (operand) == CONST_DOUBLE)
22136 {
22137 REAL_VALUE_TYPE r;
22138 long l[4];
22139
22140 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22141 real_to_target (l, &r, mode);
22142
22143 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22144 if (HOST_BITS_PER_WIDE_INT >= 64)
22145 parts[0]
22146 = gen_int_mode
22147 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22148 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22149 DImode);
22150 else
22151 parts[0] = immed_double_const (l[0], l[1], DImode);
22152
22153 if (upper_mode == SImode)
22154 parts[1] = gen_int_mode (l[2], SImode);
22155 else if (HOST_BITS_PER_WIDE_INT >= 64)
22156 parts[1]
22157 = gen_int_mode
22158 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22159 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22160 DImode);
22161 else
22162 parts[1] = immed_double_const (l[2], l[3], DImode);
22163 }
22164 else
22165 gcc_unreachable ();
22166 }
22167 }
22168
22169 return size;
22170 }
22171
22172 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22173 Return false when normal moves are needed; true when all required
22174 insns have been emitted. Operands 2-4 contain the input values
22175 int the correct order; operands 5-7 contain the output values. */
22176
22177 void
22178 ix86_split_long_move (rtx operands[])
22179 {
22180 rtx part[2][4];
22181 int nparts, i, j;
22182 int push = 0;
22183 int collisions = 0;
22184 enum machine_mode mode = GET_MODE (operands[0]);
22185 bool collisionparts[4];
22186
22187 /* The DFmode expanders may ask us to move double.
22188 For 64bit target this is single move. By hiding the fact
22189 here we simplify i386.md splitters. */
22190 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22191 {
22192 /* Optimize constant pool reference to immediates. This is used by
22193 fp moves, that force all constants to memory to allow combining. */
22194
22195 if (MEM_P (operands[1])
22196 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22197 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22198 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22199 if (push_operand (operands[0], VOIDmode))
22200 {
22201 operands[0] = copy_rtx (operands[0]);
22202 PUT_MODE (operands[0], word_mode);
22203 }
22204 else
22205 operands[0] = gen_lowpart (DImode, operands[0]);
22206 operands[1] = gen_lowpart (DImode, operands[1]);
22207 emit_move_insn (operands[0], operands[1]);
22208 return;
22209 }
22210
22211 /* The only non-offsettable memory we handle is push. */
22212 if (push_operand (operands[0], VOIDmode))
22213 push = 1;
22214 else
22215 gcc_assert (!MEM_P (operands[0])
22216 || offsettable_memref_p (operands[0]));
22217
22218 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22219 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22220
22221 /* When emitting push, take care for source operands on the stack. */
22222 if (push && MEM_P (operands[1])
22223 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22224 {
22225 rtx src_base = XEXP (part[1][nparts - 1], 0);
22226
22227 /* Compensate for the stack decrement by 4. */
22228 if (!TARGET_64BIT && nparts == 3
22229 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22230 src_base = plus_constant (Pmode, src_base, 4);
22231
22232 /* src_base refers to the stack pointer and is
22233 automatically decreased by emitted push. */
22234 for (i = 0; i < nparts; i++)
22235 part[1][i] = change_address (part[1][i],
22236 GET_MODE (part[1][i]), src_base);
22237 }
22238
22239 /* We need to do copy in the right order in case an address register
22240 of the source overlaps the destination. */
22241 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22242 {
22243 rtx tmp;
22244
22245 for (i = 0; i < nparts; i++)
22246 {
22247 collisionparts[i]
22248 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22249 if (collisionparts[i])
22250 collisions++;
22251 }
22252
22253 /* Collision in the middle part can be handled by reordering. */
22254 if (collisions == 1 && nparts == 3 && collisionparts [1])
22255 {
22256 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22257 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22258 }
22259 else if (collisions == 1
22260 && nparts == 4
22261 && (collisionparts [1] || collisionparts [2]))
22262 {
22263 if (collisionparts [1])
22264 {
22265 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22266 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22267 }
22268 else
22269 {
22270 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22271 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22272 }
22273 }
22274
22275 /* If there are more collisions, we can't handle it by reordering.
22276 Do an lea to the last part and use only one colliding move. */
22277 else if (collisions > 1)
22278 {
22279 rtx base;
22280
22281 collisions = 1;
22282
22283 base = part[0][nparts - 1];
22284
22285 /* Handle the case when the last part isn't valid for lea.
22286 Happens in 64-bit mode storing the 12-byte XFmode. */
22287 if (GET_MODE (base) != Pmode)
22288 base = gen_rtx_REG (Pmode, REGNO (base));
22289
22290 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22291 part[1][0] = replace_equiv_address (part[1][0], base);
22292 for (i = 1; i < nparts; i++)
22293 {
22294 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22295 part[1][i] = replace_equiv_address (part[1][i], tmp);
22296 }
22297 }
22298 }
22299
22300 if (push)
22301 {
22302 if (!TARGET_64BIT)
22303 {
22304 if (nparts == 3)
22305 {
22306 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22307 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22308 stack_pointer_rtx, GEN_INT (-4)));
22309 emit_move_insn (part[0][2], part[1][2]);
22310 }
22311 else if (nparts == 4)
22312 {
22313 emit_move_insn (part[0][3], part[1][3]);
22314 emit_move_insn (part[0][2], part[1][2]);
22315 }
22316 }
22317 else
22318 {
22319 /* In 64bit mode we don't have 32bit push available. In case this is
22320 register, it is OK - we will just use larger counterpart. We also
22321 retype memory - these comes from attempt to avoid REX prefix on
22322 moving of second half of TFmode value. */
22323 if (GET_MODE (part[1][1]) == SImode)
22324 {
22325 switch (GET_CODE (part[1][1]))
22326 {
22327 case MEM:
22328 part[1][1] = adjust_address (part[1][1], DImode, 0);
22329 break;
22330
22331 case REG:
22332 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22333 break;
22334
22335 default:
22336 gcc_unreachable ();
22337 }
22338
22339 if (GET_MODE (part[1][0]) == SImode)
22340 part[1][0] = part[1][1];
22341 }
22342 }
22343 emit_move_insn (part[0][1], part[1][1]);
22344 emit_move_insn (part[0][0], part[1][0]);
22345 return;
22346 }
22347
22348 /* Choose correct order to not overwrite the source before it is copied. */
22349 if ((REG_P (part[0][0])
22350 && REG_P (part[1][1])
22351 && (REGNO (part[0][0]) == REGNO (part[1][1])
22352 || (nparts == 3
22353 && REGNO (part[0][0]) == REGNO (part[1][2]))
22354 || (nparts == 4
22355 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22356 || (collisions > 0
22357 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22358 {
22359 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22360 {
22361 operands[2 + i] = part[0][j];
22362 operands[6 + i] = part[1][j];
22363 }
22364 }
22365 else
22366 {
22367 for (i = 0; i < nparts; i++)
22368 {
22369 operands[2 + i] = part[0][i];
22370 operands[6 + i] = part[1][i];
22371 }
22372 }
22373
22374 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22375 if (optimize_insn_for_size_p ())
22376 {
22377 for (j = 0; j < nparts - 1; j++)
22378 if (CONST_INT_P (operands[6 + j])
22379 && operands[6 + j] != const0_rtx
22380 && REG_P (operands[2 + j]))
22381 for (i = j; i < nparts - 1; i++)
22382 if (CONST_INT_P (operands[7 + i])
22383 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22384 operands[7 + i] = operands[2 + j];
22385 }
22386
22387 for (i = 0; i < nparts; i++)
22388 emit_move_insn (operands[2 + i], operands[6 + i]);
22389
22390 return;
22391 }
22392
22393 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22394 left shift by a constant, either using a single shift or
22395 a sequence of add instructions. */
22396
22397 static void
22398 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22399 {
22400 rtx (*insn)(rtx, rtx, rtx);
22401
22402 if (count == 1
22403 || (count * ix86_cost->add <= ix86_cost->shift_const
22404 && !optimize_insn_for_size_p ()))
22405 {
22406 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22407 while (count-- > 0)
22408 emit_insn (insn (operand, operand, operand));
22409 }
22410 else
22411 {
22412 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22413 emit_insn (insn (operand, operand, GEN_INT (count)));
22414 }
22415 }
22416
22417 void
22418 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22419 {
22420 rtx (*gen_ashl3)(rtx, rtx, rtx);
22421 rtx (*gen_shld)(rtx, rtx, rtx);
22422 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22423
22424 rtx low[2], high[2];
22425 int count;
22426
22427 if (CONST_INT_P (operands[2]))
22428 {
22429 split_double_mode (mode, operands, 2, low, high);
22430 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22431
22432 if (count >= half_width)
22433 {
22434 emit_move_insn (high[0], low[1]);
22435 emit_move_insn (low[0], const0_rtx);
22436
22437 if (count > half_width)
22438 ix86_expand_ashl_const (high[0], count - half_width, mode);
22439 }
22440 else
22441 {
22442 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22443
22444 if (!rtx_equal_p (operands[0], operands[1]))
22445 emit_move_insn (operands[0], operands[1]);
22446
22447 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22448 ix86_expand_ashl_const (low[0], count, mode);
22449 }
22450 return;
22451 }
22452
22453 split_double_mode (mode, operands, 1, low, high);
22454
22455 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22456
22457 if (operands[1] == const1_rtx)
22458 {
22459 /* Assuming we've chosen a QImode capable registers, then 1 << N
22460 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22461 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22462 {
22463 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22464
22465 ix86_expand_clear (low[0]);
22466 ix86_expand_clear (high[0]);
22467 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22468
22469 d = gen_lowpart (QImode, low[0]);
22470 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22471 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22472 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22473
22474 d = gen_lowpart (QImode, high[0]);
22475 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22476 s = gen_rtx_NE (QImode, flags, const0_rtx);
22477 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22478 }
22479
22480 /* Otherwise, we can get the same results by manually performing
22481 a bit extract operation on bit 5/6, and then performing the two
22482 shifts. The two methods of getting 0/1 into low/high are exactly
22483 the same size. Avoiding the shift in the bit extract case helps
22484 pentium4 a bit; no one else seems to care much either way. */
22485 else
22486 {
22487 enum machine_mode half_mode;
22488 rtx (*gen_lshr3)(rtx, rtx, rtx);
22489 rtx (*gen_and3)(rtx, rtx, rtx);
22490 rtx (*gen_xor3)(rtx, rtx, rtx);
22491 HOST_WIDE_INT bits;
22492 rtx x;
22493
22494 if (mode == DImode)
22495 {
22496 half_mode = SImode;
22497 gen_lshr3 = gen_lshrsi3;
22498 gen_and3 = gen_andsi3;
22499 gen_xor3 = gen_xorsi3;
22500 bits = 5;
22501 }
22502 else
22503 {
22504 half_mode = DImode;
22505 gen_lshr3 = gen_lshrdi3;
22506 gen_and3 = gen_anddi3;
22507 gen_xor3 = gen_xordi3;
22508 bits = 6;
22509 }
22510
22511 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22512 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22513 else
22514 x = gen_lowpart (half_mode, operands[2]);
22515 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22516
22517 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22518 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22519 emit_move_insn (low[0], high[0]);
22520 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22521 }
22522
22523 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22524 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22525 return;
22526 }
22527
22528 if (operands[1] == constm1_rtx)
22529 {
22530 /* For -1 << N, we can avoid the shld instruction, because we
22531 know that we're shifting 0...31/63 ones into a -1. */
22532 emit_move_insn (low[0], constm1_rtx);
22533 if (optimize_insn_for_size_p ())
22534 emit_move_insn (high[0], low[0]);
22535 else
22536 emit_move_insn (high[0], constm1_rtx);
22537 }
22538 else
22539 {
22540 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22541
22542 if (!rtx_equal_p (operands[0], operands[1]))
22543 emit_move_insn (operands[0], operands[1]);
22544
22545 split_double_mode (mode, operands, 1, low, high);
22546 emit_insn (gen_shld (high[0], low[0], operands[2]));
22547 }
22548
22549 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22550
22551 if (TARGET_CMOVE && scratch)
22552 {
22553 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22554 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22555
22556 ix86_expand_clear (scratch);
22557 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22558 }
22559 else
22560 {
22561 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22562 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22563
22564 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22565 }
22566 }
22567
22568 void
22569 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22570 {
22571 rtx (*gen_ashr3)(rtx, rtx, rtx)
22572 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22573 rtx (*gen_shrd)(rtx, rtx, rtx);
22574 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22575
22576 rtx low[2], high[2];
22577 int count;
22578
22579 if (CONST_INT_P (operands[2]))
22580 {
22581 split_double_mode (mode, operands, 2, low, high);
22582 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22583
22584 if (count == GET_MODE_BITSIZE (mode) - 1)
22585 {
22586 emit_move_insn (high[0], high[1]);
22587 emit_insn (gen_ashr3 (high[0], high[0],
22588 GEN_INT (half_width - 1)));
22589 emit_move_insn (low[0], high[0]);
22590
22591 }
22592 else if (count >= half_width)
22593 {
22594 emit_move_insn (low[0], high[1]);
22595 emit_move_insn (high[0], low[0]);
22596 emit_insn (gen_ashr3 (high[0], high[0],
22597 GEN_INT (half_width - 1)));
22598
22599 if (count > half_width)
22600 emit_insn (gen_ashr3 (low[0], low[0],
22601 GEN_INT (count - half_width)));
22602 }
22603 else
22604 {
22605 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22606
22607 if (!rtx_equal_p (operands[0], operands[1]))
22608 emit_move_insn (operands[0], operands[1]);
22609
22610 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22611 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22612 }
22613 }
22614 else
22615 {
22616 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22617
22618 if (!rtx_equal_p (operands[0], operands[1]))
22619 emit_move_insn (operands[0], operands[1]);
22620
22621 split_double_mode (mode, operands, 1, low, high);
22622
22623 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22624 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22625
22626 if (TARGET_CMOVE && scratch)
22627 {
22628 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22629 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22630
22631 emit_move_insn (scratch, high[0]);
22632 emit_insn (gen_ashr3 (scratch, scratch,
22633 GEN_INT (half_width - 1)));
22634 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22635 scratch));
22636 }
22637 else
22638 {
22639 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22640 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22641
22642 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22643 }
22644 }
22645 }
22646
22647 void
22648 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22649 {
22650 rtx (*gen_lshr3)(rtx, rtx, rtx)
22651 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22652 rtx (*gen_shrd)(rtx, rtx, rtx);
22653 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22654
22655 rtx low[2], high[2];
22656 int count;
22657
22658 if (CONST_INT_P (operands[2]))
22659 {
22660 split_double_mode (mode, operands, 2, low, high);
22661 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22662
22663 if (count >= half_width)
22664 {
22665 emit_move_insn (low[0], high[1]);
22666 ix86_expand_clear (high[0]);
22667
22668 if (count > half_width)
22669 emit_insn (gen_lshr3 (low[0], low[0],
22670 GEN_INT (count - half_width)));
22671 }
22672 else
22673 {
22674 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22675
22676 if (!rtx_equal_p (operands[0], operands[1]))
22677 emit_move_insn (operands[0], operands[1]);
22678
22679 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22680 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22681 }
22682 }
22683 else
22684 {
22685 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22686
22687 if (!rtx_equal_p (operands[0], operands[1]))
22688 emit_move_insn (operands[0], operands[1]);
22689
22690 split_double_mode (mode, operands, 1, low, high);
22691
22692 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22693 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22694
22695 if (TARGET_CMOVE && scratch)
22696 {
22697 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22698 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22699
22700 ix86_expand_clear (scratch);
22701 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22702 scratch));
22703 }
22704 else
22705 {
22706 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22707 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22708
22709 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22710 }
22711 }
22712 }
22713
22714 /* Predict just emitted jump instruction to be taken with probability PROB. */
22715 static void
22716 predict_jump (int prob)
22717 {
22718 rtx insn = get_last_insn ();
22719 gcc_assert (JUMP_P (insn));
22720 add_int_reg_note (insn, REG_BR_PROB, prob);
22721 }
22722
22723 /* Helper function for the string operations below. Dest VARIABLE whether
22724 it is aligned to VALUE bytes. If true, jump to the label. */
22725 static rtx
22726 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22727 {
22728 rtx label = gen_label_rtx ();
22729 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22730 if (GET_MODE (variable) == DImode)
22731 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22732 else
22733 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22734 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22735 1, label);
22736 if (epilogue)
22737 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22738 else
22739 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22740 return label;
22741 }
22742
22743 /* Adjust COUNTER by the VALUE. */
22744 static void
22745 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22746 {
22747 rtx (*gen_add)(rtx, rtx, rtx)
22748 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22749
22750 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22751 }
22752
22753 /* Zero extend possibly SImode EXP to Pmode register. */
22754 rtx
22755 ix86_zero_extend_to_Pmode (rtx exp)
22756 {
22757 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22758 }
22759
22760 /* Divide COUNTREG by SCALE. */
22761 static rtx
22762 scale_counter (rtx countreg, int scale)
22763 {
22764 rtx sc;
22765
22766 if (scale == 1)
22767 return countreg;
22768 if (CONST_INT_P (countreg))
22769 return GEN_INT (INTVAL (countreg) / scale);
22770 gcc_assert (REG_P (countreg));
22771
22772 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22773 GEN_INT (exact_log2 (scale)),
22774 NULL, 1, OPTAB_DIRECT);
22775 return sc;
22776 }
22777
22778 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22779 DImode for constant loop counts. */
22780
22781 static enum machine_mode
22782 counter_mode (rtx count_exp)
22783 {
22784 if (GET_MODE (count_exp) != VOIDmode)
22785 return GET_MODE (count_exp);
22786 if (!CONST_INT_P (count_exp))
22787 return Pmode;
22788 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22789 return DImode;
22790 return SImode;
22791 }
22792
22793 /* Copy the address to a Pmode register. This is used for x32 to
22794 truncate DImode TLS address to a SImode register. */
22795
22796 static rtx
22797 ix86_copy_addr_to_reg (rtx addr)
22798 {
22799 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22800 return copy_addr_to_reg (addr);
22801 else
22802 {
22803 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22804 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22805 }
22806 }
22807
22808 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22809 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22810 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22811 memory by VALUE (supposed to be in MODE).
22812
22813 The size is rounded down to whole number of chunk size moved at once.
22814 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22815
22816
22817 static void
22818 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22819 rtx destptr, rtx srcptr, rtx value,
22820 rtx count, enum machine_mode mode, int unroll,
22821 int expected_size, bool issetmem)
22822 {
22823 rtx out_label, top_label, iter, tmp;
22824 enum machine_mode iter_mode = counter_mode (count);
22825 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22826 rtx piece_size = GEN_INT (piece_size_n);
22827 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22828 rtx size;
22829 int i;
22830
22831 top_label = gen_label_rtx ();
22832 out_label = gen_label_rtx ();
22833 iter = gen_reg_rtx (iter_mode);
22834
22835 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22836 NULL, 1, OPTAB_DIRECT);
22837 /* Those two should combine. */
22838 if (piece_size == const1_rtx)
22839 {
22840 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22841 true, out_label);
22842 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22843 }
22844 emit_move_insn (iter, const0_rtx);
22845
22846 emit_label (top_label);
22847
22848 tmp = convert_modes (Pmode, iter_mode, iter, true);
22849
22850 /* This assert could be relaxed - in this case we'll need to compute
22851 smallest power of two, containing in PIECE_SIZE_N and pass it to
22852 offset_address. */
22853 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22854 destmem = offset_address (destmem, tmp, piece_size_n);
22855 destmem = adjust_address (destmem, mode, 0);
22856
22857 if (!issetmem)
22858 {
22859 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22860 srcmem = adjust_address (srcmem, mode, 0);
22861
22862 /* When unrolling for chips that reorder memory reads and writes,
22863 we can save registers by using single temporary.
22864 Also using 4 temporaries is overkill in 32bit mode. */
22865 if (!TARGET_64BIT && 0)
22866 {
22867 for (i = 0; i < unroll; i++)
22868 {
22869 if (i)
22870 {
22871 destmem =
22872 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22873 srcmem =
22874 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22875 }
22876 emit_move_insn (destmem, srcmem);
22877 }
22878 }
22879 else
22880 {
22881 rtx tmpreg[4];
22882 gcc_assert (unroll <= 4);
22883 for (i = 0; i < unroll; i++)
22884 {
22885 tmpreg[i] = gen_reg_rtx (mode);
22886 if (i)
22887 {
22888 srcmem =
22889 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22890 }
22891 emit_move_insn (tmpreg[i], srcmem);
22892 }
22893 for (i = 0; i < unroll; i++)
22894 {
22895 if (i)
22896 {
22897 destmem =
22898 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22899 }
22900 emit_move_insn (destmem, tmpreg[i]);
22901 }
22902 }
22903 }
22904 else
22905 for (i = 0; i < unroll; i++)
22906 {
22907 if (i)
22908 destmem =
22909 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22910 emit_move_insn (destmem, value);
22911 }
22912
22913 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22914 true, OPTAB_LIB_WIDEN);
22915 if (tmp != iter)
22916 emit_move_insn (iter, tmp);
22917
22918 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22919 true, top_label);
22920 if (expected_size != -1)
22921 {
22922 expected_size /= GET_MODE_SIZE (mode) * unroll;
22923 if (expected_size == 0)
22924 predict_jump (0);
22925 else if (expected_size > REG_BR_PROB_BASE)
22926 predict_jump (REG_BR_PROB_BASE - 1);
22927 else
22928 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22929 }
22930 else
22931 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22932 iter = ix86_zero_extend_to_Pmode (iter);
22933 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22934 true, OPTAB_LIB_WIDEN);
22935 if (tmp != destptr)
22936 emit_move_insn (destptr, tmp);
22937 if (!issetmem)
22938 {
22939 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22940 true, OPTAB_LIB_WIDEN);
22941 if (tmp != srcptr)
22942 emit_move_insn (srcptr, tmp);
22943 }
22944 emit_label (out_label);
22945 }
22946
22947 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22948 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22949 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22950 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22951 ORIG_VALUE is the original value passed to memset to fill the memory with.
22952 Other arguments have same meaning as for previous function. */
22953
22954 static void
22955 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22956 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22957 rtx count,
22958 enum machine_mode mode, bool issetmem)
22959 {
22960 rtx destexp;
22961 rtx srcexp;
22962 rtx countreg;
22963 HOST_WIDE_INT rounded_count;
22964
22965 /* If possible, it is shorter to use rep movs.
22966 TODO: Maybe it is better to move this logic to decide_alg. */
22967 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22968 && (!issetmem || orig_value == const0_rtx))
22969 mode = SImode;
22970
22971 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22972 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22973
22974 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22975 GET_MODE_SIZE (mode)));
22976 if (mode != QImode)
22977 {
22978 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22979 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22980 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22981 }
22982 else
22983 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22984 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22985 {
22986 rounded_count = (INTVAL (count)
22987 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22988 destmem = shallow_copy_rtx (destmem);
22989 set_mem_size (destmem, rounded_count);
22990 }
22991 else if (MEM_SIZE_KNOWN_P (destmem))
22992 clear_mem_size (destmem);
22993
22994 if (issetmem)
22995 {
22996 value = force_reg (mode, gen_lowpart (mode, value));
22997 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22998 }
22999 else
23000 {
23001 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23002 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23003 if (mode != QImode)
23004 {
23005 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23006 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23007 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23008 }
23009 else
23010 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23011 if (CONST_INT_P (count))
23012 {
23013 rounded_count = (INTVAL (count)
23014 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23015 srcmem = shallow_copy_rtx (srcmem);
23016 set_mem_size (srcmem, rounded_count);
23017 }
23018 else
23019 {
23020 if (MEM_SIZE_KNOWN_P (srcmem))
23021 clear_mem_size (srcmem);
23022 }
23023 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23024 destexp, srcexp));
23025 }
23026 }
23027
23028 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23029 DESTMEM.
23030 SRC is passed by pointer to be updated on return.
23031 Return value is updated DST. */
23032 static rtx
23033 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23034 HOST_WIDE_INT size_to_move)
23035 {
23036 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23037 enum insn_code code;
23038 enum machine_mode move_mode;
23039 int piece_size, i;
23040
23041 /* Find the widest mode in which we could perform moves.
23042 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23043 it until move of such size is supported. */
23044 piece_size = 1 << floor_log2 (size_to_move);
23045 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23046 code = optab_handler (mov_optab, move_mode);
23047 while (code == CODE_FOR_nothing && piece_size > 1)
23048 {
23049 piece_size >>= 1;
23050 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23051 code = optab_handler (mov_optab, move_mode);
23052 }
23053
23054 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23055 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23056 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23057 {
23058 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23059 move_mode = mode_for_vector (word_mode, nunits);
23060 code = optab_handler (mov_optab, move_mode);
23061 if (code == CODE_FOR_nothing)
23062 {
23063 move_mode = word_mode;
23064 piece_size = GET_MODE_SIZE (move_mode);
23065 code = optab_handler (mov_optab, move_mode);
23066 }
23067 }
23068 gcc_assert (code != CODE_FOR_nothing);
23069
23070 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23071 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23072
23073 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23074 gcc_assert (size_to_move % piece_size == 0);
23075 adjust = GEN_INT (piece_size);
23076 for (i = 0; i < size_to_move; i += piece_size)
23077 {
23078 /* We move from memory to memory, so we'll need to do it via
23079 a temporary register. */
23080 tempreg = gen_reg_rtx (move_mode);
23081 emit_insn (GEN_FCN (code) (tempreg, src));
23082 emit_insn (GEN_FCN (code) (dst, tempreg));
23083
23084 emit_move_insn (destptr,
23085 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23086 emit_move_insn (srcptr,
23087 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23088
23089 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23090 piece_size);
23091 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23092 piece_size);
23093 }
23094
23095 /* Update DST and SRC rtx. */
23096 *srcmem = src;
23097 return dst;
23098 }
23099
23100 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23101 static void
23102 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23103 rtx destptr, rtx srcptr, rtx count, int max_size)
23104 {
23105 rtx src, dest;
23106 if (CONST_INT_P (count))
23107 {
23108 HOST_WIDE_INT countval = INTVAL (count);
23109 HOST_WIDE_INT epilogue_size = countval % max_size;
23110 int i;
23111
23112 /* For now MAX_SIZE should be a power of 2. This assert could be
23113 relaxed, but it'll require a bit more complicated epilogue
23114 expanding. */
23115 gcc_assert ((max_size & (max_size - 1)) == 0);
23116 for (i = max_size; i >= 1; i >>= 1)
23117 {
23118 if (epilogue_size & i)
23119 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23120 }
23121 return;
23122 }
23123 if (max_size > 8)
23124 {
23125 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23126 count, 1, OPTAB_DIRECT);
23127 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23128 count, QImode, 1, 4, false);
23129 return;
23130 }
23131
23132 /* When there are stringops, we can cheaply increase dest and src pointers.
23133 Otherwise we save code size by maintaining offset (zero is readily
23134 available from preceding rep operation) and using x86 addressing modes.
23135 */
23136 if (TARGET_SINGLE_STRINGOP)
23137 {
23138 if (max_size > 4)
23139 {
23140 rtx label = ix86_expand_aligntest (count, 4, true);
23141 src = change_address (srcmem, SImode, srcptr);
23142 dest = change_address (destmem, SImode, destptr);
23143 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23144 emit_label (label);
23145 LABEL_NUSES (label) = 1;
23146 }
23147 if (max_size > 2)
23148 {
23149 rtx label = ix86_expand_aligntest (count, 2, true);
23150 src = change_address (srcmem, HImode, srcptr);
23151 dest = change_address (destmem, HImode, destptr);
23152 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23153 emit_label (label);
23154 LABEL_NUSES (label) = 1;
23155 }
23156 if (max_size > 1)
23157 {
23158 rtx label = ix86_expand_aligntest (count, 1, true);
23159 src = change_address (srcmem, QImode, srcptr);
23160 dest = change_address (destmem, QImode, destptr);
23161 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23162 emit_label (label);
23163 LABEL_NUSES (label) = 1;
23164 }
23165 }
23166 else
23167 {
23168 rtx offset = force_reg (Pmode, const0_rtx);
23169 rtx tmp;
23170
23171 if (max_size > 4)
23172 {
23173 rtx label = ix86_expand_aligntest (count, 4, true);
23174 src = change_address (srcmem, SImode, srcptr);
23175 dest = change_address (destmem, SImode, destptr);
23176 emit_move_insn (dest, src);
23177 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23178 true, OPTAB_LIB_WIDEN);
23179 if (tmp != offset)
23180 emit_move_insn (offset, tmp);
23181 emit_label (label);
23182 LABEL_NUSES (label) = 1;
23183 }
23184 if (max_size > 2)
23185 {
23186 rtx label = ix86_expand_aligntest (count, 2, true);
23187 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23188 src = change_address (srcmem, HImode, tmp);
23189 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23190 dest = change_address (destmem, HImode, tmp);
23191 emit_move_insn (dest, src);
23192 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23193 true, OPTAB_LIB_WIDEN);
23194 if (tmp != offset)
23195 emit_move_insn (offset, tmp);
23196 emit_label (label);
23197 LABEL_NUSES (label) = 1;
23198 }
23199 if (max_size > 1)
23200 {
23201 rtx label = ix86_expand_aligntest (count, 1, true);
23202 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23203 src = change_address (srcmem, QImode, tmp);
23204 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23205 dest = change_address (destmem, QImode, tmp);
23206 emit_move_insn (dest, src);
23207 emit_label (label);
23208 LABEL_NUSES (label) = 1;
23209 }
23210 }
23211 }
23212
23213 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23214 with value PROMOTED_VAL.
23215 SRC is passed by pointer to be updated on return.
23216 Return value is updated DST. */
23217 static rtx
23218 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23219 HOST_WIDE_INT size_to_move)
23220 {
23221 rtx dst = destmem, adjust;
23222 enum insn_code code;
23223 enum machine_mode move_mode;
23224 int piece_size, i;
23225
23226 /* Find the widest mode in which we could perform moves.
23227 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23228 it until move of such size is supported. */
23229 move_mode = GET_MODE (promoted_val);
23230 if (move_mode == VOIDmode)
23231 move_mode = QImode;
23232 if (size_to_move < GET_MODE_SIZE (move_mode))
23233 {
23234 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23235 promoted_val = gen_lowpart (move_mode, promoted_val);
23236 }
23237 piece_size = GET_MODE_SIZE (move_mode);
23238 code = optab_handler (mov_optab, move_mode);
23239 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23240
23241 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23242
23243 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23244 gcc_assert (size_to_move % piece_size == 0);
23245 adjust = GEN_INT (piece_size);
23246 for (i = 0; i < size_to_move; i += piece_size)
23247 {
23248 if (piece_size <= GET_MODE_SIZE (word_mode))
23249 {
23250 emit_insn (gen_strset (destptr, dst, promoted_val));
23251 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23252 piece_size);
23253 continue;
23254 }
23255
23256 emit_insn (GEN_FCN (code) (dst, promoted_val));
23257
23258 emit_move_insn (destptr,
23259 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23260
23261 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23262 piece_size);
23263 }
23264
23265 /* Update DST rtx. */
23266 return dst;
23267 }
23268 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23269 static void
23270 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23271 rtx count, int max_size)
23272 {
23273 count =
23274 expand_simple_binop (counter_mode (count), AND, count,
23275 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23276 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23277 gen_lowpart (QImode, value), count, QImode,
23278 1, max_size / 2, true);
23279 }
23280
23281 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23282 static void
23283 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23284 rtx count, int max_size)
23285 {
23286 rtx dest;
23287
23288 if (CONST_INT_P (count))
23289 {
23290 HOST_WIDE_INT countval = INTVAL (count);
23291 HOST_WIDE_INT epilogue_size = countval % max_size;
23292 int i;
23293
23294 /* For now MAX_SIZE should be a power of 2. This assert could be
23295 relaxed, but it'll require a bit more complicated epilogue
23296 expanding. */
23297 gcc_assert ((max_size & (max_size - 1)) == 0);
23298 for (i = max_size; i >= 1; i >>= 1)
23299 {
23300 if (epilogue_size & i)
23301 {
23302 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23303 destmem = emit_memset (destmem, destptr, vec_value, i);
23304 else
23305 destmem = emit_memset (destmem, destptr, value, i);
23306 }
23307 }
23308 return;
23309 }
23310 if (max_size > 32)
23311 {
23312 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23313 return;
23314 }
23315 if (max_size > 16)
23316 {
23317 rtx label = ix86_expand_aligntest (count, 16, true);
23318 if (TARGET_64BIT)
23319 {
23320 dest = change_address (destmem, DImode, destptr);
23321 emit_insn (gen_strset (destptr, dest, value));
23322 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23323 emit_insn (gen_strset (destptr, dest, value));
23324 }
23325 else
23326 {
23327 dest = change_address (destmem, SImode, destptr);
23328 emit_insn (gen_strset (destptr, dest, value));
23329 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23330 emit_insn (gen_strset (destptr, dest, value));
23331 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23332 emit_insn (gen_strset (destptr, dest, value));
23333 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23334 emit_insn (gen_strset (destptr, dest, value));
23335 }
23336 emit_label (label);
23337 LABEL_NUSES (label) = 1;
23338 }
23339 if (max_size > 8)
23340 {
23341 rtx label = ix86_expand_aligntest (count, 8, true);
23342 if (TARGET_64BIT)
23343 {
23344 dest = change_address (destmem, DImode, destptr);
23345 emit_insn (gen_strset (destptr, dest, value));
23346 }
23347 else
23348 {
23349 dest = change_address (destmem, SImode, destptr);
23350 emit_insn (gen_strset (destptr, dest, value));
23351 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23352 emit_insn (gen_strset (destptr, dest, value));
23353 }
23354 emit_label (label);
23355 LABEL_NUSES (label) = 1;
23356 }
23357 if (max_size > 4)
23358 {
23359 rtx label = ix86_expand_aligntest (count, 4, true);
23360 dest = change_address (destmem, SImode, destptr);
23361 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23362 emit_label (label);
23363 LABEL_NUSES (label) = 1;
23364 }
23365 if (max_size > 2)
23366 {
23367 rtx label = ix86_expand_aligntest (count, 2, true);
23368 dest = change_address (destmem, HImode, destptr);
23369 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23370 emit_label (label);
23371 LABEL_NUSES (label) = 1;
23372 }
23373 if (max_size > 1)
23374 {
23375 rtx label = ix86_expand_aligntest (count, 1, true);
23376 dest = change_address (destmem, QImode, destptr);
23377 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23378 emit_label (label);
23379 LABEL_NUSES (label) = 1;
23380 }
23381 }
23382
23383 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23384 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23385 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23386 ignored.
23387 Return value is updated DESTMEM. */
23388 static rtx
23389 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23390 rtx destptr, rtx srcptr, rtx value,
23391 rtx vec_value, rtx count, int align,
23392 int desired_alignment, bool issetmem)
23393 {
23394 int i;
23395 for (i = 1; i < desired_alignment; i <<= 1)
23396 {
23397 if (align <= i)
23398 {
23399 rtx label = ix86_expand_aligntest (destptr, i, false);
23400 if (issetmem)
23401 {
23402 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23403 destmem = emit_memset (destmem, destptr, vec_value, i);
23404 else
23405 destmem = emit_memset (destmem, destptr, value, i);
23406 }
23407 else
23408 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23409 ix86_adjust_counter (count, i);
23410 emit_label (label);
23411 LABEL_NUSES (label) = 1;
23412 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23413 }
23414 }
23415 return destmem;
23416 }
23417
23418 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23419 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23420 and jump to DONE_LABEL. */
23421 static void
23422 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23423 rtx destptr, rtx srcptr,
23424 rtx value, rtx vec_value,
23425 rtx count, int size,
23426 rtx done_label, bool issetmem)
23427 {
23428 rtx label = ix86_expand_aligntest (count, size, false);
23429 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23430 rtx modesize;
23431 int n;
23432
23433 /* If we do not have vector value to copy, we must reduce size. */
23434 if (issetmem)
23435 {
23436 if (!vec_value)
23437 {
23438 if (GET_MODE (value) == VOIDmode && size > 8)
23439 mode = Pmode;
23440 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23441 mode = GET_MODE (value);
23442 }
23443 else
23444 mode = GET_MODE (vec_value), value = vec_value;
23445 }
23446 else
23447 {
23448 /* Choose appropriate vector mode. */
23449 if (size >= 32)
23450 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23451 else if (size >= 16)
23452 mode = TARGET_SSE ? V16QImode : DImode;
23453 srcmem = change_address (srcmem, mode, srcptr);
23454 }
23455 destmem = change_address (destmem, mode, destptr);
23456 modesize = GEN_INT (GET_MODE_SIZE (mode));
23457 gcc_assert (GET_MODE_SIZE (mode) <= size);
23458 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23459 {
23460 if (issetmem)
23461 emit_move_insn (destmem, gen_lowpart (mode, value));
23462 else
23463 {
23464 emit_move_insn (destmem, srcmem);
23465 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23466 }
23467 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23468 }
23469
23470 destmem = offset_address (destmem, count, 1);
23471 destmem = offset_address (destmem, GEN_INT (-2 * size),
23472 GET_MODE_SIZE (mode));
23473 if (!issetmem)
23474 {
23475 srcmem = offset_address (srcmem, count, 1);
23476 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23477 GET_MODE_SIZE (mode));
23478 }
23479 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23480 {
23481 if (issetmem)
23482 emit_move_insn (destmem, gen_lowpart (mode, value));
23483 else
23484 {
23485 emit_move_insn (destmem, srcmem);
23486 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23487 }
23488 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23489 }
23490 emit_jump_insn (gen_jump (done_label));
23491 emit_barrier ();
23492
23493 emit_label (label);
23494 LABEL_NUSES (label) = 1;
23495 }
23496
23497 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23498 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23499 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23500 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23501 DONE_LABEL is a label after the whole copying sequence. The label is created
23502 on demand if *DONE_LABEL is NULL.
23503 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23504 bounds after the initial copies.
23505
23506 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23507 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23508 we will dispatch to a library call for large blocks.
23509
23510 In pseudocode we do:
23511
23512 if (COUNT < SIZE)
23513 {
23514 Assume that SIZE is 4. Bigger sizes are handled analogously
23515 if (COUNT & 4)
23516 {
23517 copy 4 bytes from SRCPTR to DESTPTR
23518 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23519 goto done_label
23520 }
23521 if (!COUNT)
23522 goto done_label;
23523 copy 1 byte from SRCPTR to DESTPTR
23524 if (COUNT & 2)
23525 {
23526 copy 2 bytes from SRCPTR to DESTPTR
23527 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23528 }
23529 }
23530 else
23531 {
23532 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23533 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23534
23535 OLD_DESPTR = DESTPTR;
23536 Align DESTPTR up to DESIRED_ALIGN
23537 SRCPTR += DESTPTR - OLD_DESTPTR
23538 COUNT -= DEST_PTR - OLD_DESTPTR
23539 if (DYNAMIC_CHECK)
23540 Round COUNT down to multiple of SIZE
23541 << optional caller supplied zero size guard is here >>
23542 << optional caller suppplied dynamic check is here >>
23543 << caller supplied main copy loop is here >>
23544 }
23545 done_label:
23546 */
23547 static void
23548 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23549 rtx *destptr, rtx *srcptr,
23550 enum machine_mode mode,
23551 rtx value, rtx vec_value,
23552 rtx *count,
23553 rtx *done_label,
23554 int size,
23555 int desired_align,
23556 int align,
23557 unsigned HOST_WIDE_INT *min_size,
23558 bool dynamic_check,
23559 bool issetmem)
23560 {
23561 rtx loop_label = NULL, label;
23562 int n;
23563 rtx modesize;
23564 int prolog_size = 0;
23565 rtx mode_value;
23566
23567 /* Chose proper value to copy. */
23568 if (issetmem && VECTOR_MODE_P (mode))
23569 mode_value = vec_value;
23570 else
23571 mode_value = value;
23572 gcc_assert (GET_MODE_SIZE (mode) <= size);
23573
23574 /* See if block is big or small, handle small blocks. */
23575 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23576 {
23577 int size2 = size;
23578 loop_label = gen_label_rtx ();
23579
23580 if (!*done_label)
23581 *done_label = gen_label_rtx ();
23582
23583 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23584 1, loop_label);
23585 size2 >>= 1;
23586
23587 /* Handle sizes > 3. */
23588 for (;size2 > 2; size2 >>= 1)
23589 expand_small_movmem_or_setmem (destmem, srcmem,
23590 *destptr, *srcptr,
23591 value, vec_value,
23592 *count,
23593 size2, *done_label, issetmem);
23594 /* Nothing to copy? Jump to DONE_LABEL if so */
23595 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23596 1, *done_label);
23597
23598 /* Do a byte copy. */
23599 destmem = change_address (destmem, QImode, *destptr);
23600 if (issetmem)
23601 emit_move_insn (destmem, gen_lowpart (QImode, value));
23602 else
23603 {
23604 srcmem = change_address (srcmem, QImode, *srcptr);
23605 emit_move_insn (destmem, srcmem);
23606 }
23607
23608 /* Handle sizes 2 and 3. */
23609 label = ix86_expand_aligntest (*count, 2, false);
23610 destmem = change_address (destmem, HImode, *destptr);
23611 destmem = offset_address (destmem, *count, 1);
23612 destmem = offset_address (destmem, GEN_INT (-2), 2);
23613 if (issetmem)
23614 emit_move_insn (destmem, gen_lowpart (HImode, value));
23615 else
23616 {
23617 srcmem = change_address (srcmem, HImode, *srcptr);
23618 srcmem = offset_address (srcmem, *count, 1);
23619 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23620 emit_move_insn (destmem, srcmem);
23621 }
23622
23623 emit_label (label);
23624 LABEL_NUSES (label) = 1;
23625 emit_jump_insn (gen_jump (*done_label));
23626 emit_barrier ();
23627 }
23628 else
23629 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23630 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23631
23632 /* Start memcpy for COUNT >= SIZE. */
23633 if (loop_label)
23634 {
23635 emit_label (loop_label);
23636 LABEL_NUSES (loop_label) = 1;
23637 }
23638
23639 /* Copy first desired_align bytes. */
23640 if (!issetmem)
23641 srcmem = change_address (srcmem, mode, *srcptr);
23642 destmem = change_address (destmem, mode, *destptr);
23643 modesize = GEN_INT (GET_MODE_SIZE (mode));
23644 for (n = 0; prolog_size < desired_align - align; n++)
23645 {
23646 if (issetmem)
23647 emit_move_insn (destmem, mode_value);
23648 else
23649 {
23650 emit_move_insn (destmem, srcmem);
23651 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23652 }
23653 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23654 prolog_size += GET_MODE_SIZE (mode);
23655 }
23656
23657
23658 /* Copy last SIZE bytes. */
23659 destmem = offset_address (destmem, *count, 1);
23660 destmem = offset_address (destmem,
23661 GEN_INT (-size - prolog_size),
23662 1);
23663 if (issetmem)
23664 emit_move_insn (destmem, mode_value);
23665 else
23666 {
23667 srcmem = offset_address (srcmem, *count, 1);
23668 srcmem = offset_address (srcmem,
23669 GEN_INT (-size - prolog_size),
23670 1);
23671 emit_move_insn (destmem, srcmem);
23672 }
23673 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23674 {
23675 destmem = offset_address (destmem, modesize, 1);
23676 if (issetmem)
23677 emit_move_insn (destmem, mode_value);
23678 else
23679 {
23680 srcmem = offset_address (srcmem, modesize, 1);
23681 emit_move_insn (destmem, srcmem);
23682 }
23683 }
23684
23685 /* Align destination. */
23686 if (desired_align > 1 && desired_align > align)
23687 {
23688 rtx saveddest = *destptr;
23689
23690 gcc_assert (desired_align <= size);
23691 /* Align destptr up, place it to new register. */
23692 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23693 GEN_INT (prolog_size),
23694 NULL_RTX, 1, OPTAB_DIRECT);
23695 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23696 GEN_INT (-desired_align),
23697 *destptr, 1, OPTAB_DIRECT);
23698 /* See how many bytes we skipped. */
23699 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23700 *destptr,
23701 saveddest, 1, OPTAB_DIRECT);
23702 /* Adjust srcptr and count. */
23703 if (!issetmem)
23704 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23705 *srcptr, 1, OPTAB_DIRECT);
23706 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23707 saveddest, *count, 1, OPTAB_DIRECT);
23708 /* We copied at most size + prolog_size. */
23709 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23710 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23711 else
23712 *min_size = 0;
23713
23714 /* Our loops always round down the bock size, but for dispatch to library
23715 we need precise value. */
23716 if (dynamic_check)
23717 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23718 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23719 }
23720 else
23721 {
23722 gcc_assert (prolog_size == 0);
23723 /* Decrease count, so we won't end up copying last word twice. */
23724 if (!CONST_INT_P (*count))
23725 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23726 constm1_rtx, *count, 1, OPTAB_DIRECT);
23727 else
23728 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23729 if (*min_size)
23730 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23731 }
23732 }
23733
23734
23735 /* This function is like the previous one, except here we know how many bytes
23736 need to be copied. That allows us to update alignment not only of DST, which
23737 is returned, but also of SRC, which is passed as a pointer for that
23738 reason. */
23739 static rtx
23740 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23741 rtx srcreg, rtx value, rtx vec_value,
23742 int desired_align, int align_bytes,
23743 bool issetmem)
23744 {
23745 rtx src = NULL;
23746 rtx orig_dst = dst;
23747 rtx orig_src = NULL;
23748 int piece_size = 1;
23749 int copied_bytes = 0;
23750
23751 if (!issetmem)
23752 {
23753 gcc_assert (srcp != NULL);
23754 src = *srcp;
23755 orig_src = src;
23756 }
23757
23758 for (piece_size = 1;
23759 piece_size <= desired_align && copied_bytes < align_bytes;
23760 piece_size <<= 1)
23761 {
23762 if (align_bytes & piece_size)
23763 {
23764 if (issetmem)
23765 {
23766 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23767 dst = emit_memset (dst, destreg, vec_value, piece_size);
23768 else
23769 dst = emit_memset (dst, destreg, value, piece_size);
23770 }
23771 else
23772 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23773 copied_bytes += piece_size;
23774 }
23775 }
23776 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23777 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23778 if (MEM_SIZE_KNOWN_P (orig_dst))
23779 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23780
23781 if (!issetmem)
23782 {
23783 int src_align_bytes = get_mem_align_offset (src, desired_align
23784 * BITS_PER_UNIT);
23785 if (src_align_bytes >= 0)
23786 src_align_bytes = desired_align - src_align_bytes;
23787 if (src_align_bytes >= 0)
23788 {
23789 unsigned int src_align;
23790 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23791 {
23792 if ((src_align_bytes & (src_align - 1))
23793 == (align_bytes & (src_align - 1)))
23794 break;
23795 }
23796 if (src_align > (unsigned int) desired_align)
23797 src_align = desired_align;
23798 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23799 set_mem_align (src, src_align * BITS_PER_UNIT);
23800 }
23801 if (MEM_SIZE_KNOWN_P (orig_src))
23802 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23803 *srcp = src;
23804 }
23805
23806 return dst;
23807 }
23808
23809 /* Return true if ALG can be used in current context.
23810 Assume we expand memset if MEMSET is true. */
23811 static bool
23812 alg_usable_p (enum stringop_alg alg, bool memset)
23813 {
23814 if (alg == no_stringop)
23815 return false;
23816 if (alg == vector_loop)
23817 return TARGET_SSE || TARGET_AVX;
23818 /* Algorithms using the rep prefix want at least edi and ecx;
23819 additionally, memset wants eax and memcpy wants esi. Don't
23820 consider such algorithms if the user has appropriated those
23821 registers for their own purposes. */
23822 if (alg == rep_prefix_1_byte
23823 || alg == rep_prefix_4_byte
23824 || alg == rep_prefix_8_byte)
23825 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23826 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23827 return true;
23828 }
23829
23830 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23831 static enum stringop_alg
23832 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23833 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23834 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23835 {
23836 const struct stringop_algs * algs;
23837 bool optimize_for_speed;
23838 int max = 0;
23839 const struct processor_costs *cost;
23840 int i;
23841 bool any_alg_usable_p = false;
23842
23843 *noalign = false;
23844 *dynamic_check = -1;
23845
23846 /* Even if the string operation call is cold, we still might spend a lot
23847 of time processing large blocks. */
23848 if (optimize_function_for_size_p (cfun)
23849 || (optimize_insn_for_size_p ()
23850 && (max_size < 256
23851 || (expected_size != -1 && expected_size < 256))))
23852 optimize_for_speed = false;
23853 else
23854 optimize_for_speed = true;
23855
23856 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23857 if (memset)
23858 algs = &cost->memset[TARGET_64BIT != 0];
23859 else
23860 algs = &cost->memcpy[TARGET_64BIT != 0];
23861
23862 /* See maximal size for user defined algorithm. */
23863 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23864 {
23865 enum stringop_alg candidate = algs->size[i].alg;
23866 bool usable = alg_usable_p (candidate, memset);
23867 any_alg_usable_p |= usable;
23868
23869 if (candidate != libcall && candidate && usable)
23870 max = algs->size[i].max;
23871 }
23872
23873 /* If expected size is not known but max size is small enough
23874 so inline version is a win, set expected size into
23875 the range. */
23876 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23877 && expected_size == -1)
23878 expected_size = min_size / 2 + max_size / 2;
23879
23880 /* If user specified the algorithm, honnor it if possible. */
23881 if (ix86_stringop_alg != no_stringop
23882 && alg_usable_p (ix86_stringop_alg, memset))
23883 return ix86_stringop_alg;
23884 /* rep; movq or rep; movl is the smallest variant. */
23885 else if (!optimize_for_speed)
23886 {
23887 *noalign = true;
23888 if (!count || (count & 3) || (memset && !zero_memset))
23889 return alg_usable_p (rep_prefix_1_byte, memset)
23890 ? rep_prefix_1_byte : loop_1_byte;
23891 else
23892 return alg_usable_p (rep_prefix_4_byte, memset)
23893 ? rep_prefix_4_byte : loop;
23894 }
23895 /* Very tiny blocks are best handled via the loop, REP is expensive to
23896 setup. */
23897 else if (expected_size != -1 && expected_size < 4)
23898 return loop_1_byte;
23899 else if (expected_size != -1)
23900 {
23901 enum stringop_alg alg = libcall;
23902 bool alg_noalign = false;
23903 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23904 {
23905 /* We get here if the algorithms that were not libcall-based
23906 were rep-prefix based and we are unable to use rep prefixes
23907 based on global register usage. Break out of the loop and
23908 use the heuristic below. */
23909 if (algs->size[i].max == 0)
23910 break;
23911 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23912 {
23913 enum stringop_alg candidate = algs->size[i].alg;
23914
23915 if (candidate != libcall && alg_usable_p (candidate, memset))
23916 {
23917 alg = candidate;
23918 alg_noalign = algs->size[i].noalign;
23919 }
23920 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23921 last non-libcall inline algorithm. */
23922 if (TARGET_INLINE_ALL_STRINGOPS)
23923 {
23924 /* When the current size is best to be copied by a libcall,
23925 but we are still forced to inline, run the heuristic below
23926 that will pick code for medium sized blocks. */
23927 if (alg != libcall)
23928 {
23929 *noalign = alg_noalign;
23930 return alg;
23931 }
23932 break;
23933 }
23934 else if (alg_usable_p (candidate, memset))
23935 {
23936 *noalign = algs->size[i].noalign;
23937 return candidate;
23938 }
23939 }
23940 }
23941 }
23942 /* When asked to inline the call anyway, try to pick meaningful choice.
23943 We look for maximal size of block that is faster to copy by hand and
23944 take blocks of at most of that size guessing that average size will
23945 be roughly half of the block.
23946
23947 If this turns out to be bad, we might simply specify the preferred
23948 choice in ix86_costs. */
23949 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23950 && (algs->unknown_size == libcall
23951 || !alg_usable_p (algs->unknown_size, memset)))
23952 {
23953 enum stringop_alg alg;
23954
23955 /* If there aren't any usable algorithms, then recursing on
23956 smaller sizes isn't going to find anything. Just return the
23957 simple byte-at-a-time copy loop. */
23958 if (!any_alg_usable_p)
23959 {
23960 /* Pick something reasonable. */
23961 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23962 *dynamic_check = 128;
23963 return loop_1_byte;
23964 }
23965 if (max <= 0)
23966 max = 4096;
23967 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23968 zero_memset, dynamic_check, noalign);
23969 gcc_assert (*dynamic_check == -1);
23970 gcc_assert (alg != libcall);
23971 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23972 *dynamic_check = max;
23973 return alg;
23974 }
23975 return (alg_usable_p (algs->unknown_size, memset)
23976 ? algs->unknown_size : libcall);
23977 }
23978
23979 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23980 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23981 static int
23982 decide_alignment (int align,
23983 enum stringop_alg alg,
23984 int expected_size,
23985 enum machine_mode move_mode)
23986 {
23987 int desired_align = 0;
23988
23989 gcc_assert (alg != no_stringop);
23990
23991 if (alg == libcall)
23992 return 0;
23993 if (move_mode == VOIDmode)
23994 return 0;
23995
23996 desired_align = GET_MODE_SIZE (move_mode);
23997 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23998 copying whole cacheline at once. */
23999 if (TARGET_PENTIUMPRO
24000 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24001 desired_align = 8;
24002
24003 if (optimize_size)
24004 desired_align = 1;
24005 if (desired_align < align)
24006 desired_align = align;
24007 if (expected_size != -1 && expected_size < 4)
24008 desired_align = align;
24009
24010 return desired_align;
24011 }
24012
24013
24014 /* Helper function for memcpy. For QImode value 0xXY produce
24015 0xXYXYXYXY of wide specified by MODE. This is essentially
24016 a * 0x10101010, but we can do slightly better than
24017 synth_mult by unwinding the sequence by hand on CPUs with
24018 slow multiply. */
24019 static rtx
24020 promote_duplicated_reg (enum machine_mode mode, rtx val)
24021 {
24022 enum machine_mode valmode = GET_MODE (val);
24023 rtx tmp;
24024 int nops = mode == DImode ? 3 : 2;
24025
24026 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24027 if (val == const0_rtx)
24028 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24029 if (CONST_INT_P (val))
24030 {
24031 HOST_WIDE_INT v = INTVAL (val) & 255;
24032
24033 v |= v << 8;
24034 v |= v << 16;
24035 if (mode == DImode)
24036 v |= (v << 16) << 16;
24037 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24038 }
24039
24040 if (valmode == VOIDmode)
24041 valmode = QImode;
24042 if (valmode != QImode)
24043 val = gen_lowpart (QImode, val);
24044 if (mode == QImode)
24045 return val;
24046 if (!TARGET_PARTIAL_REG_STALL)
24047 nops--;
24048 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24049 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24050 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24051 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24052 {
24053 rtx reg = convert_modes (mode, QImode, val, true);
24054 tmp = promote_duplicated_reg (mode, const1_rtx);
24055 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24056 OPTAB_DIRECT);
24057 }
24058 else
24059 {
24060 rtx reg = convert_modes (mode, QImode, val, true);
24061
24062 if (!TARGET_PARTIAL_REG_STALL)
24063 if (mode == SImode)
24064 emit_insn (gen_movsi_insv_1 (reg, reg));
24065 else
24066 emit_insn (gen_movdi_insv_1 (reg, reg));
24067 else
24068 {
24069 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24070 NULL, 1, OPTAB_DIRECT);
24071 reg =
24072 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24073 }
24074 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24075 NULL, 1, OPTAB_DIRECT);
24076 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24077 if (mode == SImode)
24078 return reg;
24079 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24080 NULL, 1, OPTAB_DIRECT);
24081 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24082 return reg;
24083 }
24084 }
24085
24086 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24087 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24088 alignment from ALIGN to DESIRED_ALIGN. */
24089 static rtx
24090 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24091 int align)
24092 {
24093 rtx promoted_val;
24094
24095 if (TARGET_64BIT
24096 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24097 promoted_val = promote_duplicated_reg (DImode, val);
24098 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24099 promoted_val = promote_duplicated_reg (SImode, val);
24100 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24101 promoted_val = promote_duplicated_reg (HImode, val);
24102 else
24103 promoted_val = val;
24104
24105 return promoted_val;
24106 }
24107
24108 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24109 operations when profitable. The code depends upon architecture, block size
24110 and alignment, but always has one of the following overall structures:
24111
24112 Aligned move sequence:
24113
24114 1) Prologue guard: Conditional that jumps up to epilogues for small
24115 blocks that can be handled by epilogue alone. This is faster
24116 but also needed for correctness, since prologue assume the block
24117 is larger than the desired alignment.
24118
24119 Optional dynamic check for size and libcall for large
24120 blocks is emitted here too, with -minline-stringops-dynamically.
24121
24122 2) Prologue: copy first few bytes in order to get destination
24123 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24124 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24125 copied. We emit either a jump tree on power of two sized
24126 blocks, or a byte loop.
24127
24128 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24129 with specified algorithm.
24130
24131 4) Epilogue: code copying tail of the block that is too small to be
24132 handled by main body (or up to size guarded by prologue guard).
24133
24134 Misaligned move sequence
24135
24136 1) missaligned move prologue/epilogue containing:
24137 a) Prologue handling small memory blocks and jumping to done_label
24138 (skipped if blocks are known to be large enough)
24139 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24140 needed by single possibly misaligned move
24141 (skipped if alignment is not needed)
24142 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24143
24144 2) Zero size guard dispatching to done_label, if needed
24145
24146 3) dispatch to library call, if needed,
24147
24148 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24149 with specified algorithm. */
24150 bool
24151 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24152 rtx align_exp, rtx expected_align_exp,
24153 rtx expected_size_exp, rtx min_size_exp,
24154 rtx max_size_exp, rtx probable_max_size_exp,
24155 bool issetmem)
24156 {
24157 rtx destreg;
24158 rtx srcreg = NULL;
24159 rtx label = NULL;
24160 rtx tmp;
24161 rtx jump_around_label = NULL;
24162 HOST_WIDE_INT align = 1;
24163 unsigned HOST_WIDE_INT count = 0;
24164 HOST_WIDE_INT expected_size = -1;
24165 int size_needed = 0, epilogue_size_needed;
24166 int desired_align = 0, align_bytes = 0;
24167 enum stringop_alg alg;
24168 rtx promoted_val = NULL;
24169 rtx vec_promoted_val = NULL;
24170 bool force_loopy_epilogue = false;
24171 int dynamic_check;
24172 bool need_zero_guard = false;
24173 bool noalign;
24174 enum machine_mode move_mode = VOIDmode;
24175 int unroll_factor = 1;
24176 /* TODO: Once value ranges are available, fill in proper data. */
24177 unsigned HOST_WIDE_INT min_size = 0;
24178 unsigned HOST_WIDE_INT max_size = -1;
24179 unsigned HOST_WIDE_INT probable_max_size = -1;
24180 bool misaligned_prologue_used = false;
24181
24182 if (CONST_INT_P (align_exp))
24183 align = INTVAL (align_exp);
24184 /* i386 can do misaligned access on reasonably increased cost. */
24185 if (CONST_INT_P (expected_align_exp)
24186 && INTVAL (expected_align_exp) > align)
24187 align = INTVAL (expected_align_exp);
24188 /* ALIGN is the minimum of destination and source alignment, but we care here
24189 just about destination alignment. */
24190 else if (!issetmem
24191 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24192 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24193
24194 if (CONST_INT_P (count_exp))
24195 {
24196 min_size = max_size = probable_max_size = count = expected_size
24197 = INTVAL (count_exp);
24198 /* When COUNT is 0, there is nothing to do. */
24199 if (!count)
24200 return true;
24201 }
24202 else
24203 {
24204 if (min_size_exp)
24205 min_size = INTVAL (min_size_exp);
24206 if (max_size_exp)
24207 max_size = INTVAL (max_size_exp);
24208 if (probable_max_size_exp)
24209 probable_max_size = INTVAL (probable_max_size_exp);
24210 if (CONST_INT_P (expected_size_exp))
24211 expected_size = INTVAL (expected_size_exp);
24212 }
24213
24214 /* Make sure we don't need to care about overflow later on. */
24215 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24216 return false;
24217
24218 /* Step 0: Decide on preferred algorithm, desired alignment and
24219 size of chunks to be copied by main loop. */
24220 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24221 issetmem,
24222 issetmem && val_exp == const0_rtx,
24223 &dynamic_check, &noalign);
24224 if (alg == libcall)
24225 return false;
24226 gcc_assert (alg != no_stringop);
24227
24228 /* For now vector-version of memset is generated only for memory zeroing, as
24229 creating of promoted vector value is very cheap in this case. */
24230 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24231 alg = unrolled_loop;
24232
24233 if (!count)
24234 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24235 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24236 if (!issetmem)
24237 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24238
24239 unroll_factor = 1;
24240 move_mode = word_mode;
24241 switch (alg)
24242 {
24243 case libcall:
24244 case no_stringop:
24245 case last_alg:
24246 gcc_unreachable ();
24247 case loop_1_byte:
24248 need_zero_guard = true;
24249 move_mode = QImode;
24250 break;
24251 case loop:
24252 need_zero_guard = true;
24253 break;
24254 case unrolled_loop:
24255 need_zero_guard = true;
24256 unroll_factor = (TARGET_64BIT ? 4 : 2);
24257 break;
24258 case vector_loop:
24259 need_zero_guard = true;
24260 unroll_factor = 4;
24261 /* Find the widest supported mode. */
24262 move_mode = word_mode;
24263 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24264 != CODE_FOR_nothing)
24265 move_mode = GET_MODE_WIDER_MODE (move_mode);
24266
24267 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24268 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24269 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24270 {
24271 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24272 move_mode = mode_for_vector (word_mode, nunits);
24273 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24274 move_mode = word_mode;
24275 }
24276 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24277 break;
24278 case rep_prefix_8_byte:
24279 move_mode = DImode;
24280 break;
24281 case rep_prefix_4_byte:
24282 move_mode = SImode;
24283 break;
24284 case rep_prefix_1_byte:
24285 move_mode = QImode;
24286 break;
24287 }
24288 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24289 epilogue_size_needed = size_needed;
24290
24291 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24292 if (!TARGET_ALIGN_STRINGOPS || noalign)
24293 align = desired_align;
24294
24295 /* Step 1: Prologue guard. */
24296
24297 /* Alignment code needs count to be in register. */
24298 if (CONST_INT_P (count_exp) && desired_align > align)
24299 {
24300 if (INTVAL (count_exp) > desired_align
24301 && INTVAL (count_exp) > size_needed)
24302 {
24303 align_bytes
24304 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24305 if (align_bytes <= 0)
24306 align_bytes = 0;
24307 else
24308 align_bytes = desired_align - align_bytes;
24309 }
24310 if (align_bytes == 0)
24311 count_exp = force_reg (counter_mode (count_exp), count_exp);
24312 }
24313 gcc_assert (desired_align >= 1 && align >= 1);
24314
24315 /* Misaligned move sequences handle both prologue and epilogue at once.
24316 Default code generation results in a smaller code for large alignments
24317 and also avoids redundant job when sizes are known precisely. */
24318 misaligned_prologue_used
24319 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24320 && MAX (desired_align, epilogue_size_needed) <= 32
24321 && desired_align <= epilogue_size_needed
24322 && ((desired_align > align && !align_bytes)
24323 || (!count && epilogue_size_needed > 1)));
24324
24325 /* Do the cheap promotion to allow better CSE across the
24326 main loop and epilogue (ie one load of the big constant in the
24327 front of all code.
24328 For now the misaligned move sequences do not have fast path
24329 without broadcasting. */
24330 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24331 {
24332 if (alg == vector_loop)
24333 {
24334 gcc_assert (val_exp == const0_rtx);
24335 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24336 promoted_val = promote_duplicated_reg_to_size (val_exp,
24337 GET_MODE_SIZE (word_mode),
24338 desired_align, align);
24339 }
24340 else
24341 {
24342 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24343 desired_align, align);
24344 }
24345 }
24346 /* Misaligned move sequences handles both prologues and epilogues at once.
24347 Default code generation results in smaller code for large alignments and
24348 also avoids redundant job when sizes are known precisely. */
24349 if (misaligned_prologue_used)
24350 {
24351 /* Misaligned move prologue handled small blocks by itself. */
24352 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24353 (dst, src, &destreg, &srcreg,
24354 move_mode, promoted_val, vec_promoted_val,
24355 &count_exp,
24356 &jump_around_label,
24357 desired_align < align
24358 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24359 desired_align, align, &min_size, dynamic_check, issetmem);
24360 if (!issetmem)
24361 src = change_address (src, BLKmode, srcreg);
24362 dst = change_address (dst, BLKmode, destreg);
24363 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24364 epilogue_size_needed = 0;
24365 if (need_zero_guard && !min_size)
24366 {
24367 /* It is possible that we copied enough so the main loop will not
24368 execute. */
24369 gcc_assert (size_needed > 1);
24370 if (jump_around_label == NULL_RTX)
24371 jump_around_label = gen_label_rtx ();
24372 emit_cmp_and_jump_insns (count_exp,
24373 GEN_INT (size_needed),
24374 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24375 if (expected_size == -1
24376 || expected_size < (desired_align - align) / 2 + size_needed)
24377 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24378 else
24379 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24380 }
24381 }
24382 /* Ensure that alignment prologue won't copy past end of block. */
24383 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24384 {
24385 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24386 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24387 Make sure it is power of 2. */
24388 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24389
24390 /* To improve performance of small blocks, we jump around the VAL
24391 promoting mode. This mean that if the promoted VAL is not constant,
24392 we might not use it in the epilogue and have to use byte
24393 loop variant. */
24394 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24395 force_loopy_epilogue = true;
24396 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24397 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24398 {
24399 /* If main algorithm works on QImode, no epilogue is needed.
24400 For small sizes just don't align anything. */
24401 if (size_needed == 1)
24402 desired_align = align;
24403 else
24404 goto epilogue;
24405 }
24406 else if (!count
24407 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24408 {
24409 label = gen_label_rtx ();
24410 emit_cmp_and_jump_insns (count_exp,
24411 GEN_INT (epilogue_size_needed),
24412 LTU, 0, counter_mode (count_exp), 1, label);
24413 if (expected_size == -1 || expected_size < epilogue_size_needed)
24414 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24415 else
24416 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24417 }
24418 }
24419
24420 /* Emit code to decide on runtime whether library call or inline should be
24421 used. */
24422 if (dynamic_check != -1)
24423 {
24424 if (!issetmem && CONST_INT_P (count_exp))
24425 {
24426 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24427 {
24428 emit_block_move_via_libcall (dst, src, count_exp, false);
24429 count_exp = const0_rtx;
24430 goto epilogue;
24431 }
24432 }
24433 else
24434 {
24435 rtx hot_label = gen_label_rtx ();
24436 if (jump_around_label == NULL_RTX)
24437 jump_around_label = gen_label_rtx ();
24438 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24439 LEU, 0, counter_mode (count_exp),
24440 1, hot_label);
24441 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24442 if (issetmem)
24443 set_storage_via_libcall (dst, count_exp, val_exp, false);
24444 else
24445 emit_block_move_via_libcall (dst, src, count_exp, false);
24446 emit_jump (jump_around_label);
24447 emit_label (hot_label);
24448 }
24449 }
24450
24451 /* Step 2: Alignment prologue. */
24452 /* Do the expensive promotion once we branched off the small blocks. */
24453 if (issetmem && !promoted_val)
24454 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24455 desired_align, align);
24456
24457 if (desired_align > align && !misaligned_prologue_used)
24458 {
24459 if (align_bytes == 0)
24460 {
24461 /* Except for the first move in prologue, we no longer know
24462 constant offset in aliasing info. It don't seems to worth
24463 the pain to maintain it for the first move, so throw away
24464 the info early. */
24465 dst = change_address (dst, BLKmode, destreg);
24466 if (!issetmem)
24467 src = change_address (src, BLKmode, srcreg);
24468 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24469 promoted_val, vec_promoted_val,
24470 count_exp, align, desired_align,
24471 issetmem);
24472 /* At most desired_align - align bytes are copied. */
24473 if (min_size < (unsigned)(desired_align - align))
24474 min_size = 0;
24475 else
24476 min_size -= desired_align - align;
24477 }
24478 else
24479 {
24480 /* If we know how many bytes need to be stored before dst is
24481 sufficiently aligned, maintain aliasing info accurately. */
24482 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24483 srcreg,
24484 promoted_val,
24485 vec_promoted_val,
24486 desired_align,
24487 align_bytes,
24488 issetmem);
24489
24490 count_exp = plus_constant (counter_mode (count_exp),
24491 count_exp, -align_bytes);
24492 count -= align_bytes;
24493 min_size -= align_bytes;
24494 max_size -= align_bytes;
24495 }
24496 if (need_zero_guard
24497 && !min_size
24498 && (count < (unsigned HOST_WIDE_INT) size_needed
24499 || (align_bytes == 0
24500 && count < ((unsigned HOST_WIDE_INT) size_needed
24501 + desired_align - align))))
24502 {
24503 /* It is possible that we copied enough so the main loop will not
24504 execute. */
24505 gcc_assert (size_needed > 1);
24506 if (label == NULL_RTX)
24507 label = gen_label_rtx ();
24508 emit_cmp_and_jump_insns (count_exp,
24509 GEN_INT (size_needed),
24510 LTU, 0, counter_mode (count_exp), 1, label);
24511 if (expected_size == -1
24512 || expected_size < (desired_align - align) / 2 + size_needed)
24513 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24514 else
24515 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24516 }
24517 }
24518 if (label && size_needed == 1)
24519 {
24520 emit_label (label);
24521 LABEL_NUSES (label) = 1;
24522 label = NULL;
24523 epilogue_size_needed = 1;
24524 if (issetmem)
24525 promoted_val = val_exp;
24526 }
24527 else if (label == NULL_RTX && !misaligned_prologue_used)
24528 epilogue_size_needed = size_needed;
24529
24530 /* Step 3: Main loop. */
24531
24532 switch (alg)
24533 {
24534 case libcall:
24535 case no_stringop:
24536 case last_alg:
24537 gcc_unreachable ();
24538 case loop_1_byte:
24539 case loop:
24540 case unrolled_loop:
24541 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24542 count_exp, move_mode, unroll_factor,
24543 expected_size, issetmem);
24544 break;
24545 case vector_loop:
24546 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24547 vec_promoted_val, count_exp, move_mode,
24548 unroll_factor, expected_size, issetmem);
24549 break;
24550 case rep_prefix_8_byte:
24551 case rep_prefix_4_byte:
24552 case rep_prefix_1_byte:
24553 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24554 val_exp, count_exp, move_mode, issetmem);
24555 break;
24556 }
24557 /* Adjust properly the offset of src and dest memory for aliasing. */
24558 if (CONST_INT_P (count_exp))
24559 {
24560 if (!issetmem)
24561 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24562 (count / size_needed) * size_needed);
24563 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24564 (count / size_needed) * size_needed);
24565 }
24566 else
24567 {
24568 if (!issetmem)
24569 src = change_address (src, BLKmode, srcreg);
24570 dst = change_address (dst, BLKmode, destreg);
24571 }
24572
24573 /* Step 4: Epilogue to copy the remaining bytes. */
24574 epilogue:
24575 if (label)
24576 {
24577 /* When the main loop is done, COUNT_EXP might hold original count,
24578 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24579 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24580 bytes. Compensate if needed. */
24581
24582 if (size_needed < epilogue_size_needed)
24583 {
24584 tmp =
24585 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24586 GEN_INT (size_needed - 1), count_exp, 1,
24587 OPTAB_DIRECT);
24588 if (tmp != count_exp)
24589 emit_move_insn (count_exp, tmp);
24590 }
24591 emit_label (label);
24592 LABEL_NUSES (label) = 1;
24593 }
24594
24595 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24596 {
24597 if (force_loopy_epilogue)
24598 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24599 epilogue_size_needed);
24600 else
24601 {
24602 if (issetmem)
24603 expand_setmem_epilogue (dst, destreg, promoted_val,
24604 vec_promoted_val, count_exp,
24605 epilogue_size_needed);
24606 else
24607 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24608 epilogue_size_needed);
24609 }
24610 }
24611 if (jump_around_label)
24612 emit_label (jump_around_label);
24613 return true;
24614 }
24615
24616
24617 /* Expand the appropriate insns for doing strlen if not just doing
24618 repnz; scasb
24619
24620 out = result, initialized with the start address
24621 align_rtx = alignment of the address.
24622 scratch = scratch register, initialized with the startaddress when
24623 not aligned, otherwise undefined
24624
24625 This is just the body. It needs the initializations mentioned above and
24626 some address computing at the end. These things are done in i386.md. */
24627
24628 static void
24629 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24630 {
24631 int align;
24632 rtx tmp;
24633 rtx align_2_label = NULL_RTX;
24634 rtx align_3_label = NULL_RTX;
24635 rtx align_4_label = gen_label_rtx ();
24636 rtx end_0_label = gen_label_rtx ();
24637 rtx mem;
24638 rtx tmpreg = gen_reg_rtx (SImode);
24639 rtx scratch = gen_reg_rtx (SImode);
24640 rtx cmp;
24641
24642 align = 0;
24643 if (CONST_INT_P (align_rtx))
24644 align = INTVAL (align_rtx);
24645
24646 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24647
24648 /* Is there a known alignment and is it less than 4? */
24649 if (align < 4)
24650 {
24651 rtx scratch1 = gen_reg_rtx (Pmode);
24652 emit_move_insn (scratch1, out);
24653 /* Is there a known alignment and is it not 2? */
24654 if (align != 2)
24655 {
24656 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24657 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24658
24659 /* Leave just the 3 lower bits. */
24660 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24661 NULL_RTX, 0, OPTAB_WIDEN);
24662
24663 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24664 Pmode, 1, align_4_label);
24665 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24666 Pmode, 1, align_2_label);
24667 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24668 Pmode, 1, align_3_label);
24669 }
24670 else
24671 {
24672 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24673 check if is aligned to 4 - byte. */
24674
24675 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24676 NULL_RTX, 0, OPTAB_WIDEN);
24677
24678 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24679 Pmode, 1, align_4_label);
24680 }
24681
24682 mem = change_address (src, QImode, out);
24683
24684 /* Now compare the bytes. */
24685
24686 /* Compare the first n unaligned byte on a byte per byte basis. */
24687 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24688 QImode, 1, end_0_label);
24689
24690 /* Increment the address. */
24691 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24692
24693 /* Not needed with an alignment of 2 */
24694 if (align != 2)
24695 {
24696 emit_label (align_2_label);
24697
24698 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24699 end_0_label);
24700
24701 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24702
24703 emit_label (align_3_label);
24704 }
24705
24706 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24707 end_0_label);
24708
24709 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24710 }
24711
24712 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24713 align this loop. It gives only huge programs, but does not help to
24714 speed up. */
24715 emit_label (align_4_label);
24716
24717 mem = change_address (src, SImode, out);
24718 emit_move_insn (scratch, mem);
24719 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24720
24721 /* This formula yields a nonzero result iff one of the bytes is zero.
24722 This saves three branches inside loop and many cycles. */
24723
24724 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24725 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24726 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24727 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24728 gen_int_mode (0x80808080, SImode)));
24729 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24730 align_4_label);
24731
24732 if (TARGET_CMOVE)
24733 {
24734 rtx reg = gen_reg_rtx (SImode);
24735 rtx reg2 = gen_reg_rtx (Pmode);
24736 emit_move_insn (reg, tmpreg);
24737 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24738
24739 /* If zero is not in the first two bytes, move two bytes forward. */
24740 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24741 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24742 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24743 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24744 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24745 reg,
24746 tmpreg)));
24747 /* Emit lea manually to avoid clobbering of flags. */
24748 emit_insn (gen_rtx_SET (SImode, reg2,
24749 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24750
24751 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24752 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24753 emit_insn (gen_rtx_SET (VOIDmode, out,
24754 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24755 reg2,
24756 out)));
24757 }
24758 else
24759 {
24760 rtx end_2_label = gen_label_rtx ();
24761 /* Is zero in the first two bytes? */
24762
24763 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24764 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24765 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24766 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24767 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24768 pc_rtx);
24769 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24770 JUMP_LABEL (tmp) = end_2_label;
24771
24772 /* Not in the first two. Move two bytes forward. */
24773 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24774 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24775
24776 emit_label (end_2_label);
24777
24778 }
24779
24780 /* Avoid branch in fixing the byte. */
24781 tmpreg = gen_lowpart (QImode, tmpreg);
24782 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24783 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24784 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24785 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24786
24787 emit_label (end_0_label);
24788 }
24789
24790 /* Expand strlen. */
24791
24792 bool
24793 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24794 {
24795 rtx addr, scratch1, scratch2, scratch3, scratch4;
24796
24797 /* The generic case of strlen expander is long. Avoid it's
24798 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24799
24800 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24801 && !TARGET_INLINE_ALL_STRINGOPS
24802 && !optimize_insn_for_size_p ()
24803 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24804 return false;
24805
24806 addr = force_reg (Pmode, XEXP (src, 0));
24807 scratch1 = gen_reg_rtx (Pmode);
24808
24809 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24810 && !optimize_insn_for_size_p ())
24811 {
24812 /* Well it seems that some optimizer does not combine a call like
24813 foo(strlen(bar), strlen(bar));
24814 when the move and the subtraction is done here. It does calculate
24815 the length just once when these instructions are done inside of
24816 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24817 often used and I use one fewer register for the lifetime of
24818 output_strlen_unroll() this is better. */
24819
24820 emit_move_insn (out, addr);
24821
24822 ix86_expand_strlensi_unroll_1 (out, src, align);
24823
24824 /* strlensi_unroll_1 returns the address of the zero at the end of
24825 the string, like memchr(), so compute the length by subtracting
24826 the start address. */
24827 emit_insn (ix86_gen_sub3 (out, out, addr));
24828 }
24829 else
24830 {
24831 rtx unspec;
24832
24833 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24834 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24835 return false;
24836
24837 scratch2 = gen_reg_rtx (Pmode);
24838 scratch3 = gen_reg_rtx (Pmode);
24839 scratch4 = force_reg (Pmode, constm1_rtx);
24840
24841 emit_move_insn (scratch3, addr);
24842 eoschar = force_reg (QImode, eoschar);
24843
24844 src = replace_equiv_address_nv (src, scratch3);
24845
24846 /* If .md starts supporting :P, this can be done in .md. */
24847 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24848 scratch4), UNSPEC_SCAS);
24849 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24850 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24851 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24852 }
24853 return true;
24854 }
24855
24856 /* For given symbol (function) construct code to compute address of it's PLT
24857 entry in large x86-64 PIC model. */
24858 static rtx
24859 construct_plt_address (rtx symbol)
24860 {
24861 rtx tmp, unspec;
24862
24863 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24864 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24865 gcc_assert (Pmode == DImode);
24866
24867 tmp = gen_reg_rtx (Pmode);
24868 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24869
24870 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24871 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24872 return tmp;
24873 }
24874
24875 rtx
24876 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24877 rtx callarg2,
24878 rtx pop, bool sibcall)
24879 {
24880 unsigned int const cregs_size
24881 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24882 rtx vec[3 + cregs_size];
24883 rtx use = NULL, call;
24884 unsigned int vec_len = 0;
24885
24886 if (pop == const0_rtx)
24887 pop = NULL;
24888 gcc_assert (!TARGET_64BIT || !pop);
24889
24890 if (TARGET_MACHO && !TARGET_64BIT)
24891 {
24892 #if TARGET_MACHO
24893 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24894 fnaddr = machopic_indirect_call_target (fnaddr);
24895 #endif
24896 }
24897 else
24898 {
24899 /* Static functions and indirect calls don't need the pic register. */
24900 if (flag_pic
24901 && (!TARGET_64BIT
24902 || (ix86_cmodel == CM_LARGE_PIC
24903 && DEFAULT_ABI != MS_ABI))
24904 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24905 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24906 use_reg (&use, pic_offset_table_rtx);
24907 }
24908
24909 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24910 {
24911 rtx al = gen_rtx_REG (QImode, AX_REG);
24912 emit_move_insn (al, callarg2);
24913 use_reg (&use, al);
24914 }
24915
24916 if (ix86_cmodel == CM_LARGE_PIC
24917 && !TARGET_PECOFF
24918 && MEM_P (fnaddr)
24919 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24920 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24921 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24922 else if (sibcall
24923 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24924 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24925 {
24926 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24927 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24928 }
24929
24930 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24931 if (retval)
24932 call = gen_rtx_SET (VOIDmode, retval, call);
24933 vec[vec_len++] = call;
24934
24935 if (pop)
24936 {
24937 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24938 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24939 vec[vec_len++] = pop;
24940 }
24941
24942 if (TARGET_64BIT_MS_ABI
24943 && (!callarg2 || INTVAL (callarg2) != -2))
24944 {
24945 unsigned i;
24946
24947 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24948 UNSPEC_MS_TO_SYSV_CALL);
24949
24950 for (i = 0; i < cregs_size; i++)
24951 {
24952 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24953 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24954
24955 vec[vec_len++]
24956 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24957 }
24958 }
24959
24960 if (vec_len > 1)
24961 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24962 call = emit_call_insn (call);
24963 if (use)
24964 CALL_INSN_FUNCTION_USAGE (call) = use;
24965
24966 return call;
24967 }
24968
24969 /* Output the assembly for a call instruction. */
24970
24971 const char *
24972 ix86_output_call_insn (rtx insn, rtx call_op)
24973 {
24974 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24975 bool seh_nop_p = false;
24976 const char *xasm;
24977
24978 if (SIBLING_CALL_P (insn))
24979 {
24980 if (direct_p)
24981 xasm = "jmp\t%P0";
24982 /* SEH epilogue detection requires the indirect branch case
24983 to include REX.W. */
24984 else if (TARGET_SEH)
24985 xasm = "rex.W jmp %A0";
24986 else
24987 xasm = "jmp\t%A0";
24988
24989 output_asm_insn (xasm, &call_op);
24990 return "";
24991 }
24992
24993 /* SEH unwinding can require an extra nop to be emitted in several
24994 circumstances. Determine if we have one of those. */
24995 if (TARGET_SEH)
24996 {
24997 rtx i;
24998
24999 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25000 {
25001 /* If we get to another real insn, we don't need the nop. */
25002 if (INSN_P (i))
25003 break;
25004
25005 /* If we get to the epilogue note, prevent a catch region from
25006 being adjacent to the standard epilogue sequence. If non-
25007 call-exceptions, we'll have done this during epilogue emission. */
25008 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25009 && !flag_non_call_exceptions
25010 && !can_throw_internal (insn))
25011 {
25012 seh_nop_p = true;
25013 break;
25014 }
25015 }
25016
25017 /* If we didn't find a real insn following the call, prevent the
25018 unwinder from looking into the next function. */
25019 if (i == NULL)
25020 seh_nop_p = true;
25021 }
25022
25023 if (direct_p)
25024 xasm = "call\t%P0";
25025 else
25026 xasm = "call\t%A0";
25027
25028 output_asm_insn (xasm, &call_op);
25029
25030 if (seh_nop_p)
25031 return "nop";
25032
25033 return "";
25034 }
25035 \f
25036 /* Clear stack slot assignments remembered from previous functions.
25037 This is called from INIT_EXPANDERS once before RTL is emitted for each
25038 function. */
25039
25040 static struct machine_function *
25041 ix86_init_machine_status (void)
25042 {
25043 struct machine_function *f;
25044
25045 f = ggc_cleared_alloc<machine_function> ();
25046 f->use_fast_prologue_epilogue_nregs = -1;
25047 f->call_abi = ix86_abi;
25048
25049 return f;
25050 }
25051
25052 /* Return a MEM corresponding to a stack slot with mode MODE.
25053 Allocate a new slot if necessary.
25054
25055 The RTL for a function can have several slots available: N is
25056 which slot to use. */
25057
25058 rtx
25059 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25060 {
25061 struct stack_local_entry *s;
25062
25063 gcc_assert (n < MAX_386_STACK_LOCALS);
25064
25065 for (s = ix86_stack_locals; s; s = s->next)
25066 if (s->mode == mode && s->n == n)
25067 return validize_mem (copy_rtx (s->rtl));
25068
25069 s = ggc_alloc<stack_local_entry> ();
25070 s->n = n;
25071 s->mode = mode;
25072 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25073
25074 s->next = ix86_stack_locals;
25075 ix86_stack_locals = s;
25076 return validize_mem (s->rtl);
25077 }
25078
25079 static void
25080 ix86_instantiate_decls (void)
25081 {
25082 struct stack_local_entry *s;
25083
25084 for (s = ix86_stack_locals; s; s = s->next)
25085 if (s->rtl != NULL_RTX)
25086 instantiate_decl_rtl (s->rtl);
25087 }
25088 \f
25089 /* Check whether x86 address PARTS is a pc-relative address. */
25090
25091 static bool
25092 rip_relative_addr_p (struct ix86_address *parts)
25093 {
25094 rtx base, index, disp;
25095
25096 base = parts->base;
25097 index = parts->index;
25098 disp = parts->disp;
25099
25100 if (disp && !base && !index)
25101 {
25102 if (TARGET_64BIT)
25103 {
25104 rtx symbol = disp;
25105
25106 if (GET_CODE (disp) == CONST)
25107 symbol = XEXP (disp, 0);
25108 if (GET_CODE (symbol) == PLUS
25109 && CONST_INT_P (XEXP (symbol, 1)))
25110 symbol = XEXP (symbol, 0);
25111
25112 if (GET_CODE (symbol) == LABEL_REF
25113 || (GET_CODE (symbol) == SYMBOL_REF
25114 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25115 || (GET_CODE (symbol) == UNSPEC
25116 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25117 || XINT (symbol, 1) == UNSPEC_PCREL
25118 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25119 return true;
25120 }
25121 }
25122 return false;
25123 }
25124
25125 /* Calculate the length of the memory address in the instruction encoding.
25126 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25127 or other prefixes. We never generate addr32 prefix for LEA insn. */
25128
25129 int
25130 memory_address_length (rtx addr, bool lea)
25131 {
25132 struct ix86_address parts;
25133 rtx base, index, disp;
25134 int len;
25135 int ok;
25136
25137 if (GET_CODE (addr) == PRE_DEC
25138 || GET_CODE (addr) == POST_INC
25139 || GET_CODE (addr) == PRE_MODIFY
25140 || GET_CODE (addr) == POST_MODIFY)
25141 return 0;
25142
25143 ok = ix86_decompose_address (addr, &parts);
25144 gcc_assert (ok);
25145
25146 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25147
25148 /* If this is not LEA instruction, add the length of addr32 prefix. */
25149 if (TARGET_64BIT && !lea
25150 && (SImode_address_operand (addr, VOIDmode)
25151 || (parts.base && GET_MODE (parts.base) == SImode)
25152 || (parts.index && GET_MODE (parts.index) == SImode)))
25153 len++;
25154
25155 base = parts.base;
25156 index = parts.index;
25157 disp = parts.disp;
25158
25159 if (base && GET_CODE (base) == SUBREG)
25160 base = SUBREG_REG (base);
25161 if (index && GET_CODE (index) == SUBREG)
25162 index = SUBREG_REG (index);
25163
25164 gcc_assert (base == NULL_RTX || REG_P (base));
25165 gcc_assert (index == NULL_RTX || REG_P (index));
25166
25167 /* Rule of thumb:
25168 - esp as the base always wants an index,
25169 - ebp as the base always wants a displacement,
25170 - r12 as the base always wants an index,
25171 - r13 as the base always wants a displacement. */
25172
25173 /* Register Indirect. */
25174 if (base && !index && !disp)
25175 {
25176 /* esp (for its index) and ebp (for its displacement) need
25177 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25178 code. */
25179 if (base == arg_pointer_rtx
25180 || base == frame_pointer_rtx
25181 || REGNO (base) == SP_REG
25182 || REGNO (base) == BP_REG
25183 || REGNO (base) == R12_REG
25184 || REGNO (base) == R13_REG)
25185 len++;
25186 }
25187
25188 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25189 is not disp32, but disp32(%rip), so for disp32
25190 SIB byte is needed, unless print_operand_address
25191 optimizes it into disp32(%rip) or (%rip) is implied
25192 by UNSPEC. */
25193 else if (disp && !base && !index)
25194 {
25195 len += 4;
25196 if (rip_relative_addr_p (&parts))
25197 len++;
25198 }
25199 else
25200 {
25201 /* Find the length of the displacement constant. */
25202 if (disp)
25203 {
25204 if (base && satisfies_constraint_K (disp))
25205 len += 1;
25206 else
25207 len += 4;
25208 }
25209 /* ebp always wants a displacement. Similarly r13. */
25210 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25211 len++;
25212
25213 /* An index requires the two-byte modrm form.... */
25214 if (index
25215 /* ...like esp (or r12), which always wants an index. */
25216 || base == arg_pointer_rtx
25217 || base == frame_pointer_rtx
25218 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25219 len++;
25220 }
25221
25222 return len;
25223 }
25224
25225 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25226 is set, expect that insn have 8bit immediate alternative. */
25227 int
25228 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25229 {
25230 int len = 0;
25231 int i;
25232 extract_insn_cached (insn);
25233 for (i = recog_data.n_operands - 1; i >= 0; --i)
25234 if (CONSTANT_P (recog_data.operand[i]))
25235 {
25236 enum attr_mode mode = get_attr_mode (insn);
25237
25238 gcc_assert (!len);
25239 if (shortform && CONST_INT_P (recog_data.operand[i]))
25240 {
25241 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25242 switch (mode)
25243 {
25244 case MODE_QI:
25245 len = 1;
25246 continue;
25247 case MODE_HI:
25248 ival = trunc_int_for_mode (ival, HImode);
25249 break;
25250 case MODE_SI:
25251 ival = trunc_int_for_mode (ival, SImode);
25252 break;
25253 default:
25254 break;
25255 }
25256 if (IN_RANGE (ival, -128, 127))
25257 {
25258 len = 1;
25259 continue;
25260 }
25261 }
25262 switch (mode)
25263 {
25264 case MODE_QI:
25265 len = 1;
25266 break;
25267 case MODE_HI:
25268 len = 2;
25269 break;
25270 case MODE_SI:
25271 len = 4;
25272 break;
25273 /* Immediates for DImode instructions are encoded
25274 as 32bit sign extended values. */
25275 case MODE_DI:
25276 len = 4;
25277 break;
25278 default:
25279 fatal_insn ("unknown insn mode", insn);
25280 }
25281 }
25282 return len;
25283 }
25284
25285 /* Compute default value for "length_address" attribute. */
25286 int
25287 ix86_attr_length_address_default (rtx insn)
25288 {
25289 int i;
25290
25291 if (get_attr_type (insn) == TYPE_LEA)
25292 {
25293 rtx set = PATTERN (insn), addr;
25294
25295 if (GET_CODE (set) == PARALLEL)
25296 set = XVECEXP (set, 0, 0);
25297
25298 gcc_assert (GET_CODE (set) == SET);
25299
25300 addr = SET_SRC (set);
25301
25302 return memory_address_length (addr, true);
25303 }
25304
25305 extract_insn_cached (insn);
25306 for (i = recog_data.n_operands - 1; i >= 0; --i)
25307 if (MEM_P (recog_data.operand[i]))
25308 {
25309 constrain_operands_cached (reload_completed);
25310 if (which_alternative != -1)
25311 {
25312 const char *constraints = recog_data.constraints[i];
25313 int alt = which_alternative;
25314
25315 while (*constraints == '=' || *constraints == '+')
25316 constraints++;
25317 while (alt-- > 0)
25318 while (*constraints++ != ',')
25319 ;
25320 /* Skip ignored operands. */
25321 if (*constraints == 'X')
25322 continue;
25323 }
25324 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25325 }
25326 return 0;
25327 }
25328
25329 /* Compute default value for "length_vex" attribute. It includes
25330 2 or 3 byte VEX prefix and 1 opcode byte. */
25331
25332 int
25333 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25334 {
25335 int i;
25336
25337 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25338 byte VEX prefix. */
25339 if (!has_0f_opcode || has_vex_w)
25340 return 3 + 1;
25341
25342 /* We can always use 2 byte VEX prefix in 32bit. */
25343 if (!TARGET_64BIT)
25344 return 2 + 1;
25345
25346 extract_insn_cached (insn);
25347
25348 for (i = recog_data.n_operands - 1; i >= 0; --i)
25349 if (REG_P (recog_data.operand[i]))
25350 {
25351 /* REX.W bit uses 3 byte VEX prefix. */
25352 if (GET_MODE (recog_data.operand[i]) == DImode
25353 && GENERAL_REG_P (recog_data.operand[i]))
25354 return 3 + 1;
25355 }
25356 else
25357 {
25358 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25359 if (MEM_P (recog_data.operand[i])
25360 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25361 return 3 + 1;
25362 }
25363
25364 return 2 + 1;
25365 }
25366 \f
25367 /* Return the maximum number of instructions a cpu can issue. */
25368
25369 static int
25370 ix86_issue_rate (void)
25371 {
25372 switch (ix86_tune)
25373 {
25374 case PROCESSOR_PENTIUM:
25375 case PROCESSOR_BONNELL:
25376 case PROCESSOR_SILVERMONT:
25377 case PROCESSOR_INTEL:
25378 case PROCESSOR_K6:
25379 case PROCESSOR_BTVER2:
25380 case PROCESSOR_PENTIUM4:
25381 case PROCESSOR_NOCONA:
25382 return 2;
25383
25384 case PROCESSOR_PENTIUMPRO:
25385 case PROCESSOR_ATHLON:
25386 case PROCESSOR_K8:
25387 case PROCESSOR_AMDFAM10:
25388 case PROCESSOR_GENERIC:
25389 case PROCESSOR_BTVER1:
25390 return 3;
25391
25392 case PROCESSOR_BDVER1:
25393 case PROCESSOR_BDVER2:
25394 case PROCESSOR_BDVER3:
25395 case PROCESSOR_BDVER4:
25396 case PROCESSOR_CORE2:
25397 case PROCESSOR_NEHALEM:
25398 case PROCESSOR_SANDYBRIDGE:
25399 case PROCESSOR_HASWELL:
25400 return 4;
25401
25402 default:
25403 return 1;
25404 }
25405 }
25406
25407 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25408 by DEP_INSN and nothing set by DEP_INSN. */
25409
25410 static bool
25411 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25412 {
25413 rtx set, set2;
25414
25415 /* Simplify the test for uninteresting insns. */
25416 if (insn_type != TYPE_SETCC
25417 && insn_type != TYPE_ICMOV
25418 && insn_type != TYPE_FCMOV
25419 && insn_type != TYPE_IBR)
25420 return false;
25421
25422 if ((set = single_set (dep_insn)) != 0)
25423 {
25424 set = SET_DEST (set);
25425 set2 = NULL_RTX;
25426 }
25427 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25428 && XVECLEN (PATTERN (dep_insn), 0) == 2
25429 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25430 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25431 {
25432 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25433 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25434 }
25435 else
25436 return false;
25437
25438 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25439 return false;
25440
25441 /* This test is true if the dependent insn reads the flags but
25442 not any other potentially set register. */
25443 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25444 return false;
25445
25446 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25447 return false;
25448
25449 return true;
25450 }
25451
25452 /* Return true iff USE_INSN has a memory address with operands set by
25453 SET_INSN. */
25454
25455 bool
25456 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25457 {
25458 int i;
25459 extract_insn_cached (use_insn);
25460 for (i = recog_data.n_operands - 1; i >= 0; --i)
25461 if (MEM_P (recog_data.operand[i]))
25462 {
25463 rtx addr = XEXP (recog_data.operand[i], 0);
25464 return modified_in_p (addr, set_insn) != 0;
25465 }
25466 return false;
25467 }
25468
25469 /* Helper function for exact_store_load_dependency.
25470 Return true if addr is found in insn. */
25471 static bool
25472 exact_dependency_1 (rtx addr, rtx insn)
25473 {
25474 enum rtx_code code;
25475 const char *format_ptr;
25476 int i, j;
25477
25478 code = GET_CODE (insn);
25479 switch (code)
25480 {
25481 case MEM:
25482 if (rtx_equal_p (addr, insn))
25483 return true;
25484 break;
25485 case REG:
25486 CASE_CONST_ANY:
25487 case SYMBOL_REF:
25488 case CODE_LABEL:
25489 case PC:
25490 case CC0:
25491 case EXPR_LIST:
25492 return false;
25493 default:
25494 break;
25495 }
25496
25497 format_ptr = GET_RTX_FORMAT (code);
25498 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25499 {
25500 switch (*format_ptr++)
25501 {
25502 case 'e':
25503 if (exact_dependency_1 (addr, XEXP (insn, i)))
25504 return true;
25505 break;
25506 case 'E':
25507 for (j = 0; j < XVECLEN (insn, i); j++)
25508 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25509 return true;
25510 break;
25511 }
25512 }
25513 return false;
25514 }
25515
25516 /* Return true if there exists exact dependency for store & load, i.e.
25517 the same memory address is used in them. */
25518 static bool
25519 exact_store_load_dependency (rtx store, rtx load)
25520 {
25521 rtx set1, set2;
25522
25523 set1 = single_set (store);
25524 if (!set1)
25525 return false;
25526 if (!MEM_P (SET_DEST (set1)))
25527 return false;
25528 set2 = single_set (load);
25529 if (!set2)
25530 return false;
25531 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25532 return true;
25533 return false;
25534 }
25535
25536 static int
25537 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25538 {
25539 enum attr_type insn_type, dep_insn_type;
25540 enum attr_memory memory;
25541 rtx set, set2;
25542 int dep_insn_code_number;
25543
25544 /* Anti and output dependencies have zero cost on all CPUs. */
25545 if (REG_NOTE_KIND (link) != 0)
25546 return 0;
25547
25548 dep_insn_code_number = recog_memoized (dep_insn);
25549
25550 /* If we can't recognize the insns, we can't really do anything. */
25551 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25552 return cost;
25553
25554 insn_type = get_attr_type (insn);
25555 dep_insn_type = get_attr_type (dep_insn);
25556
25557 switch (ix86_tune)
25558 {
25559 case PROCESSOR_PENTIUM:
25560 /* Address Generation Interlock adds a cycle of latency. */
25561 if (insn_type == TYPE_LEA)
25562 {
25563 rtx addr = PATTERN (insn);
25564
25565 if (GET_CODE (addr) == PARALLEL)
25566 addr = XVECEXP (addr, 0, 0);
25567
25568 gcc_assert (GET_CODE (addr) == SET);
25569
25570 addr = SET_SRC (addr);
25571 if (modified_in_p (addr, dep_insn))
25572 cost += 1;
25573 }
25574 else if (ix86_agi_dependent (dep_insn, insn))
25575 cost += 1;
25576
25577 /* ??? Compares pair with jump/setcc. */
25578 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25579 cost = 0;
25580
25581 /* Floating point stores require value to be ready one cycle earlier. */
25582 if (insn_type == TYPE_FMOV
25583 && get_attr_memory (insn) == MEMORY_STORE
25584 && !ix86_agi_dependent (dep_insn, insn))
25585 cost += 1;
25586 break;
25587
25588 case PROCESSOR_PENTIUMPRO:
25589 /* INT->FP conversion is expensive. */
25590 if (get_attr_fp_int_src (dep_insn))
25591 cost += 5;
25592
25593 /* There is one cycle extra latency between an FP op and a store. */
25594 if (insn_type == TYPE_FMOV
25595 && (set = single_set (dep_insn)) != NULL_RTX
25596 && (set2 = single_set (insn)) != NULL_RTX
25597 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25598 && MEM_P (SET_DEST (set2)))
25599 cost += 1;
25600
25601 memory = get_attr_memory (insn);
25602
25603 /* Show ability of reorder buffer to hide latency of load by executing
25604 in parallel with previous instruction in case
25605 previous instruction is not needed to compute the address. */
25606 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25607 && !ix86_agi_dependent (dep_insn, insn))
25608 {
25609 /* Claim moves to take one cycle, as core can issue one load
25610 at time and the next load can start cycle later. */
25611 if (dep_insn_type == TYPE_IMOV
25612 || dep_insn_type == TYPE_FMOV)
25613 cost = 1;
25614 else if (cost > 1)
25615 cost--;
25616 }
25617 break;
25618
25619 case PROCESSOR_K6:
25620 /* The esp dependency is resolved before
25621 the instruction is really finished. */
25622 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25623 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25624 return 1;
25625
25626 /* INT->FP conversion is expensive. */
25627 if (get_attr_fp_int_src (dep_insn))
25628 cost += 5;
25629
25630 memory = get_attr_memory (insn);
25631
25632 /* Show ability of reorder buffer to hide latency of load by executing
25633 in parallel with previous instruction in case
25634 previous instruction is not needed to compute the address. */
25635 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25636 && !ix86_agi_dependent (dep_insn, insn))
25637 {
25638 /* Claim moves to take one cycle, as core can issue one load
25639 at time and the next load can start cycle later. */
25640 if (dep_insn_type == TYPE_IMOV
25641 || dep_insn_type == TYPE_FMOV)
25642 cost = 1;
25643 else if (cost > 2)
25644 cost -= 2;
25645 else
25646 cost = 1;
25647 }
25648 break;
25649
25650 case PROCESSOR_AMDFAM10:
25651 case PROCESSOR_BDVER1:
25652 case PROCESSOR_BDVER2:
25653 case PROCESSOR_BDVER3:
25654 case PROCESSOR_BDVER4:
25655 case PROCESSOR_BTVER1:
25656 case PROCESSOR_BTVER2:
25657 case PROCESSOR_GENERIC:
25658 /* Stack engine allows to execute push&pop instructions in parall. */
25659 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25660 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25661 return 0;
25662 /* FALLTHRU */
25663
25664 case PROCESSOR_ATHLON:
25665 case PROCESSOR_K8:
25666 memory = get_attr_memory (insn);
25667
25668 /* Show ability of reorder buffer to hide latency of load by executing
25669 in parallel with previous instruction in case
25670 previous instruction is not needed to compute the address. */
25671 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25672 && !ix86_agi_dependent (dep_insn, insn))
25673 {
25674 enum attr_unit unit = get_attr_unit (insn);
25675 int loadcost = 3;
25676
25677 /* Because of the difference between the length of integer and
25678 floating unit pipeline preparation stages, the memory operands
25679 for floating point are cheaper.
25680
25681 ??? For Athlon it the difference is most probably 2. */
25682 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25683 loadcost = 3;
25684 else
25685 loadcost = TARGET_ATHLON ? 2 : 0;
25686
25687 if (cost >= loadcost)
25688 cost -= loadcost;
25689 else
25690 cost = 0;
25691 }
25692 break;
25693
25694 case PROCESSOR_CORE2:
25695 case PROCESSOR_NEHALEM:
25696 case PROCESSOR_SANDYBRIDGE:
25697 case PROCESSOR_HASWELL:
25698 /* Stack engine allows to execute push&pop instructions in parall. */
25699 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25700 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25701 return 0;
25702
25703 memory = get_attr_memory (insn);
25704
25705 /* Show ability of reorder buffer to hide latency of load by executing
25706 in parallel with previous instruction in case
25707 previous instruction is not needed to compute the address. */
25708 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25709 && !ix86_agi_dependent (dep_insn, insn))
25710 {
25711 if (cost >= 4)
25712 cost -= 4;
25713 else
25714 cost = 0;
25715 }
25716 break;
25717
25718 case PROCESSOR_SILVERMONT:
25719 case PROCESSOR_INTEL:
25720 if (!reload_completed)
25721 return cost;
25722
25723 /* Increase cost of integer loads. */
25724 memory = get_attr_memory (dep_insn);
25725 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25726 {
25727 enum attr_unit unit = get_attr_unit (dep_insn);
25728 if (unit == UNIT_INTEGER && cost == 1)
25729 {
25730 if (memory == MEMORY_LOAD)
25731 cost = 3;
25732 else
25733 {
25734 /* Increase cost of ld/st for short int types only
25735 because of store forwarding issue. */
25736 rtx set = single_set (dep_insn);
25737 if (set && (GET_MODE (SET_DEST (set)) == QImode
25738 || GET_MODE (SET_DEST (set)) == HImode))
25739 {
25740 /* Increase cost of store/load insn if exact
25741 dependence exists and it is load insn. */
25742 enum attr_memory insn_memory = get_attr_memory (insn);
25743 if (insn_memory == MEMORY_LOAD
25744 && exact_store_load_dependency (dep_insn, insn))
25745 cost = 3;
25746 }
25747 }
25748 }
25749 }
25750
25751 default:
25752 break;
25753 }
25754
25755 return cost;
25756 }
25757
25758 /* How many alternative schedules to try. This should be as wide as the
25759 scheduling freedom in the DFA, but no wider. Making this value too
25760 large results extra work for the scheduler. */
25761
25762 static int
25763 ia32_multipass_dfa_lookahead (void)
25764 {
25765 switch (ix86_tune)
25766 {
25767 case PROCESSOR_PENTIUM:
25768 return 2;
25769
25770 case PROCESSOR_PENTIUMPRO:
25771 case PROCESSOR_K6:
25772 return 1;
25773
25774 case PROCESSOR_BDVER1:
25775 case PROCESSOR_BDVER2:
25776 case PROCESSOR_BDVER3:
25777 case PROCESSOR_BDVER4:
25778 /* We use lookahead value 4 for BD both before and after reload
25779 schedules. Plan is to have value 8 included for O3. */
25780 return 4;
25781
25782 case PROCESSOR_CORE2:
25783 case PROCESSOR_NEHALEM:
25784 case PROCESSOR_SANDYBRIDGE:
25785 case PROCESSOR_HASWELL:
25786 case PROCESSOR_BONNELL:
25787 case PROCESSOR_SILVERMONT:
25788 case PROCESSOR_INTEL:
25789 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25790 as many instructions can be executed on a cycle, i.e.,
25791 issue_rate. I wonder why tuning for many CPUs does not do this. */
25792 if (reload_completed)
25793 return ix86_issue_rate ();
25794 /* Don't use lookahead for pre-reload schedule to save compile time. */
25795 return 0;
25796
25797 default:
25798 return 0;
25799 }
25800 }
25801
25802 /* Return true if target platform supports macro-fusion. */
25803
25804 static bool
25805 ix86_macro_fusion_p ()
25806 {
25807 return TARGET_FUSE_CMP_AND_BRANCH;
25808 }
25809
25810 /* Check whether current microarchitecture support macro fusion
25811 for insn pair "CONDGEN + CONDJMP". Refer to
25812 "Intel Architectures Optimization Reference Manual". */
25813
25814 static bool
25815 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25816 {
25817 rtx src, dest;
25818 rtx single_set = single_set (condgen);
25819 enum rtx_code ccode;
25820 rtx compare_set = NULL_RTX, test_if, cond;
25821 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25822
25823 if (get_attr_type (condgen) != TYPE_TEST
25824 && get_attr_type (condgen) != TYPE_ICMP
25825 && get_attr_type (condgen) != TYPE_INCDEC
25826 && get_attr_type (condgen) != TYPE_ALU)
25827 return false;
25828
25829 if (single_set == NULL_RTX
25830 && !TARGET_FUSE_ALU_AND_BRANCH)
25831 return false;
25832
25833 if (single_set != NULL_RTX)
25834 compare_set = single_set;
25835 else
25836 {
25837 int i;
25838 rtx pat = PATTERN (condgen);
25839 for (i = 0; i < XVECLEN (pat, 0); i++)
25840 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25841 {
25842 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25843 if (GET_CODE (set_src) == COMPARE)
25844 compare_set = XVECEXP (pat, 0, i);
25845 else
25846 alu_set = XVECEXP (pat, 0, i);
25847 }
25848 }
25849 if (compare_set == NULL_RTX)
25850 return false;
25851 src = SET_SRC (compare_set);
25852 if (GET_CODE (src) != COMPARE)
25853 return false;
25854
25855 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25856 supported. */
25857 if ((MEM_P (XEXP (src, 0))
25858 && CONST_INT_P (XEXP (src, 1)))
25859 || (MEM_P (XEXP (src, 1))
25860 && CONST_INT_P (XEXP (src, 0))))
25861 return false;
25862
25863 /* No fusion for RIP-relative address. */
25864 if (MEM_P (XEXP (src, 0)))
25865 addr = XEXP (XEXP (src, 0), 0);
25866 else if (MEM_P (XEXP (src, 1)))
25867 addr = XEXP (XEXP (src, 1), 0);
25868
25869 if (addr) {
25870 ix86_address parts;
25871 int ok = ix86_decompose_address (addr, &parts);
25872 gcc_assert (ok);
25873
25874 if (rip_relative_addr_p (&parts))
25875 return false;
25876 }
25877
25878 test_if = SET_SRC (pc_set (condjmp));
25879 cond = XEXP (test_if, 0);
25880 ccode = GET_CODE (cond);
25881 /* Check whether conditional jump use Sign or Overflow Flags. */
25882 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25883 && (ccode == GE
25884 || ccode == GT
25885 || ccode == LE
25886 || ccode == LT))
25887 return false;
25888
25889 /* Return true for TYPE_TEST and TYPE_ICMP. */
25890 if (get_attr_type (condgen) == TYPE_TEST
25891 || get_attr_type (condgen) == TYPE_ICMP)
25892 return true;
25893
25894 /* The following is the case that macro-fusion for alu + jmp. */
25895 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25896 return false;
25897
25898 /* No fusion for alu op with memory destination operand. */
25899 dest = SET_DEST (alu_set);
25900 if (MEM_P (dest))
25901 return false;
25902
25903 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25904 supported. */
25905 if (get_attr_type (condgen) == TYPE_INCDEC
25906 && (ccode == GEU
25907 || ccode == GTU
25908 || ccode == LEU
25909 || ccode == LTU))
25910 return false;
25911
25912 return true;
25913 }
25914
25915 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25916 execution. It is applied if
25917 (1) IMUL instruction is on the top of list;
25918 (2) There exists the only producer of independent IMUL instruction in
25919 ready list.
25920 Return index of IMUL producer if it was found and -1 otherwise. */
25921 static int
25922 do_reorder_for_imul (rtx *ready, int n_ready)
25923 {
25924 rtx insn, set, insn1, insn2;
25925 sd_iterator_def sd_it;
25926 dep_t dep;
25927 int index = -1;
25928 int i;
25929
25930 if (!TARGET_BONNELL)
25931 return index;
25932
25933 /* Check that IMUL instruction is on the top of ready list. */
25934 insn = ready[n_ready - 1];
25935 set = single_set (insn);
25936 if (!set)
25937 return index;
25938 if (!(GET_CODE (SET_SRC (set)) == MULT
25939 && GET_MODE (SET_SRC (set)) == SImode))
25940 return index;
25941
25942 /* Search for producer of independent IMUL instruction. */
25943 for (i = n_ready - 2; i >= 0; i--)
25944 {
25945 insn = ready[i];
25946 if (!NONDEBUG_INSN_P (insn))
25947 continue;
25948 /* Skip IMUL instruction. */
25949 insn2 = PATTERN (insn);
25950 if (GET_CODE (insn2) == PARALLEL)
25951 insn2 = XVECEXP (insn2, 0, 0);
25952 if (GET_CODE (insn2) == SET
25953 && GET_CODE (SET_SRC (insn2)) == MULT
25954 && GET_MODE (SET_SRC (insn2)) == SImode)
25955 continue;
25956
25957 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25958 {
25959 rtx con;
25960 con = DEP_CON (dep);
25961 if (!NONDEBUG_INSN_P (con))
25962 continue;
25963 insn1 = PATTERN (con);
25964 if (GET_CODE (insn1) == PARALLEL)
25965 insn1 = XVECEXP (insn1, 0, 0);
25966
25967 if (GET_CODE (insn1) == SET
25968 && GET_CODE (SET_SRC (insn1)) == MULT
25969 && GET_MODE (SET_SRC (insn1)) == SImode)
25970 {
25971 sd_iterator_def sd_it1;
25972 dep_t dep1;
25973 /* Check if there is no other dependee for IMUL. */
25974 index = i;
25975 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25976 {
25977 rtx pro;
25978 pro = DEP_PRO (dep1);
25979 if (!NONDEBUG_INSN_P (pro))
25980 continue;
25981 if (pro != insn)
25982 index = -1;
25983 }
25984 if (index >= 0)
25985 break;
25986 }
25987 }
25988 if (index >= 0)
25989 break;
25990 }
25991 return index;
25992 }
25993
25994 /* Try to find the best candidate on the top of ready list if two insns
25995 have the same priority - candidate is best if its dependees were
25996 scheduled earlier. Applied for Silvermont only.
25997 Return true if top 2 insns must be interchanged. */
25998 static bool
25999 swap_top_of_ready_list (rtx *ready, int n_ready)
26000 {
26001 rtx top = ready[n_ready - 1];
26002 rtx next = ready[n_ready - 2];
26003 rtx set;
26004 sd_iterator_def sd_it;
26005 dep_t dep;
26006 int clock1 = -1;
26007 int clock2 = -1;
26008 #define INSN_TICK(INSN) (HID (INSN)->tick)
26009
26010 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26011 return false;
26012
26013 if (!NONDEBUG_INSN_P (top))
26014 return false;
26015 if (!NONJUMP_INSN_P (top))
26016 return false;
26017 if (!NONDEBUG_INSN_P (next))
26018 return false;
26019 if (!NONJUMP_INSN_P (next))
26020 return false;
26021 set = single_set (top);
26022 if (!set)
26023 return false;
26024 set = single_set (next);
26025 if (!set)
26026 return false;
26027
26028 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26029 {
26030 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26031 return false;
26032 /* Determine winner more precise. */
26033 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26034 {
26035 rtx pro;
26036 pro = DEP_PRO (dep);
26037 if (!NONDEBUG_INSN_P (pro))
26038 continue;
26039 if (INSN_TICK (pro) > clock1)
26040 clock1 = INSN_TICK (pro);
26041 }
26042 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26043 {
26044 rtx pro;
26045 pro = DEP_PRO (dep);
26046 if (!NONDEBUG_INSN_P (pro))
26047 continue;
26048 if (INSN_TICK (pro) > clock2)
26049 clock2 = INSN_TICK (pro);
26050 }
26051
26052 if (clock1 == clock2)
26053 {
26054 /* Determine winner - load must win. */
26055 enum attr_memory memory1, memory2;
26056 memory1 = get_attr_memory (top);
26057 memory2 = get_attr_memory (next);
26058 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26059 return true;
26060 }
26061 return (bool) (clock2 < clock1);
26062 }
26063 return false;
26064 #undef INSN_TICK
26065 }
26066
26067 /* Perform possible reodering of ready list for Atom/Silvermont only.
26068 Return issue rate. */
26069 static int
26070 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26071 int clock_var)
26072 {
26073 int issue_rate = -1;
26074 int n_ready = *pn_ready;
26075 int i;
26076 rtx insn;
26077 int index = -1;
26078
26079 /* Set up issue rate. */
26080 issue_rate = ix86_issue_rate ();
26081
26082 /* Do reodering for BONNELL/SILVERMONT only. */
26083 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26084 return issue_rate;
26085
26086 /* Nothing to do if ready list contains only 1 instruction. */
26087 if (n_ready <= 1)
26088 return issue_rate;
26089
26090 /* Do reodering for post-reload scheduler only. */
26091 if (!reload_completed)
26092 return issue_rate;
26093
26094 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26095 {
26096 if (sched_verbose > 1)
26097 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26098 INSN_UID (ready[index]));
26099
26100 /* Put IMUL producer (ready[index]) at the top of ready list. */
26101 insn = ready[index];
26102 for (i = index; i < n_ready - 1; i++)
26103 ready[i] = ready[i + 1];
26104 ready[n_ready - 1] = insn;
26105 return issue_rate;
26106 }
26107 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26108 {
26109 if (sched_verbose > 1)
26110 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26111 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26112 /* Swap 2 top elements of ready list. */
26113 insn = ready[n_ready - 1];
26114 ready[n_ready - 1] = ready[n_ready - 2];
26115 ready[n_ready - 2] = insn;
26116 }
26117 return issue_rate;
26118 }
26119
26120 static bool
26121 ix86_class_likely_spilled_p (reg_class_t);
26122
26123 /* Returns true if lhs of insn is HW function argument register and set up
26124 is_spilled to true if it is likely spilled HW register. */
26125 static bool
26126 insn_is_function_arg (rtx insn, bool* is_spilled)
26127 {
26128 rtx dst;
26129
26130 if (!NONDEBUG_INSN_P (insn))
26131 return false;
26132 /* Call instructions are not movable, ignore it. */
26133 if (CALL_P (insn))
26134 return false;
26135 insn = PATTERN (insn);
26136 if (GET_CODE (insn) == PARALLEL)
26137 insn = XVECEXP (insn, 0, 0);
26138 if (GET_CODE (insn) != SET)
26139 return false;
26140 dst = SET_DEST (insn);
26141 if (REG_P (dst) && HARD_REGISTER_P (dst)
26142 && ix86_function_arg_regno_p (REGNO (dst)))
26143 {
26144 /* Is it likely spilled HW register? */
26145 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26146 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26147 *is_spilled = true;
26148 return true;
26149 }
26150 return false;
26151 }
26152
26153 /* Add output dependencies for chain of function adjacent arguments if only
26154 there is a move to likely spilled HW register. Return first argument
26155 if at least one dependence was added or NULL otherwise. */
26156 static rtx
26157 add_parameter_dependencies (rtx call, rtx head)
26158 {
26159 rtx insn;
26160 rtx last = call;
26161 rtx first_arg = NULL;
26162 bool is_spilled = false;
26163
26164 head = PREV_INSN (head);
26165
26166 /* Find nearest to call argument passing instruction. */
26167 while (true)
26168 {
26169 last = PREV_INSN (last);
26170 if (last == head)
26171 return NULL;
26172 if (!NONDEBUG_INSN_P (last))
26173 continue;
26174 if (insn_is_function_arg (last, &is_spilled))
26175 break;
26176 return NULL;
26177 }
26178
26179 first_arg = last;
26180 while (true)
26181 {
26182 insn = PREV_INSN (last);
26183 if (!INSN_P (insn))
26184 break;
26185 if (insn == head)
26186 break;
26187 if (!NONDEBUG_INSN_P (insn))
26188 {
26189 last = insn;
26190 continue;
26191 }
26192 if (insn_is_function_arg (insn, &is_spilled))
26193 {
26194 /* Add output depdendence between two function arguments if chain
26195 of output arguments contains likely spilled HW registers. */
26196 if (is_spilled)
26197 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26198 first_arg = last = insn;
26199 }
26200 else
26201 break;
26202 }
26203 if (!is_spilled)
26204 return NULL;
26205 return first_arg;
26206 }
26207
26208 /* Add output or anti dependency from insn to first_arg to restrict its code
26209 motion. */
26210 static void
26211 avoid_func_arg_motion (rtx first_arg, rtx insn)
26212 {
26213 rtx set;
26214 rtx tmp;
26215
26216 set = single_set (insn);
26217 if (!set)
26218 return;
26219 tmp = SET_DEST (set);
26220 if (REG_P (tmp))
26221 {
26222 /* Add output dependency to the first function argument. */
26223 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26224 return;
26225 }
26226 /* Add anti dependency. */
26227 add_dependence (first_arg, insn, REG_DEP_ANTI);
26228 }
26229
26230 /* Avoid cross block motion of function argument through adding dependency
26231 from the first non-jump instruction in bb. */
26232 static void
26233 add_dependee_for_func_arg (rtx arg, basic_block bb)
26234 {
26235 rtx insn = BB_END (bb);
26236
26237 while (insn)
26238 {
26239 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26240 {
26241 rtx set = single_set (insn);
26242 if (set)
26243 {
26244 avoid_func_arg_motion (arg, insn);
26245 return;
26246 }
26247 }
26248 if (insn == BB_HEAD (bb))
26249 return;
26250 insn = PREV_INSN (insn);
26251 }
26252 }
26253
26254 /* Hook for pre-reload schedule - avoid motion of function arguments
26255 passed in likely spilled HW registers. */
26256 static void
26257 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26258 {
26259 rtx insn;
26260 rtx first_arg = NULL;
26261 if (reload_completed)
26262 return;
26263 while (head != tail && DEBUG_INSN_P (head))
26264 head = NEXT_INSN (head);
26265 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26266 if (INSN_P (insn) && CALL_P (insn))
26267 {
26268 first_arg = add_parameter_dependencies (insn, head);
26269 if (first_arg)
26270 {
26271 /* Add dependee for first argument to predecessors if only
26272 region contains more than one block. */
26273 basic_block bb = BLOCK_FOR_INSN (insn);
26274 int rgn = CONTAINING_RGN (bb->index);
26275 int nr_blks = RGN_NR_BLOCKS (rgn);
26276 /* Skip trivial regions and region head blocks that can have
26277 predecessors outside of region. */
26278 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26279 {
26280 edge e;
26281 edge_iterator ei;
26282
26283 /* Regions are SCCs with the exception of selective
26284 scheduling with pipelining of outer blocks enabled.
26285 So also check that immediate predecessors of a non-head
26286 block are in the same region. */
26287 FOR_EACH_EDGE (e, ei, bb->preds)
26288 {
26289 /* Avoid creating of loop-carried dependencies through
26290 using topological ordering in the region. */
26291 if (rgn == CONTAINING_RGN (e->src->index)
26292 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26293 add_dependee_for_func_arg (first_arg, e->src);
26294 }
26295 }
26296 insn = first_arg;
26297 if (insn == head)
26298 break;
26299 }
26300 }
26301 else if (first_arg)
26302 avoid_func_arg_motion (first_arg, insn);
26303 }
26304
26305 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26306 HW registers to maximum, to schedule them at soon as possible. These are
26307 moves from function argument registers at the top of the function entry
26308 and moves from function return value registers after call. */
26309 static int
26310 ix86_adjust_priority (rtx insn, int priority)
26311 {
26312 rtx set;
26313
26314 if (reload_completed)
26315 return priority;
26316
26317 if (!NONDEBUG_INSN_P (insn))
26318 return priority;
26319
26320 set = single_set (insn);
26321 if (set)
26322 {
26323 rtx tmp = SET_SRC (set);
26324 if (REG_P (tmp)
26325 && HARD_REGISTER_P (tmp)
26326 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26327 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26328 return current_sched_info->sched_max_insns_priority;
26329 }
26330
26331 return priority;
26332 }
26333
26334 /* Model decoder of Core 2/i7.
26335 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26336 track the instruction fetch block boundaries and make sure that long
26337 (9+ bytes) instructions are assigned to D0. */
26338
26339 /* Maximum length of an insn that can be handled by
26340 a secondary decoder unit. '8' for Core 2/i7. */
26341 static int core2i7_secondary_decoder_max_insn_size;
26342
26343 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26344 '16' for Core 2/i7. */
26345 static int core2i7_ifetch_block_size;
26346
26347 /* Maximum number of instructions decoder can handle per cycle.
26348 '6' for Core 2/i7. */
26349 static int core2i7_ifetch_block_max_insns;
26350
26351 typedef struct ix86_first_cycle_multipass_data_ *
26352 ix86_first_cycle_multipass_data_t;
26353 typedef const struct ix86_first_cycle_multipass_data_ *
26354 const_ix86_first_cycle_multipass_data_t;
26355
26356 /* A variable to store target state across calls to max_issue within
26357 one cycle. */
26358 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26359 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26360
26361 /* Initialize DATA. */
26362 static void
26363 core2i7_first_cycle_multipass_init (void *_data)
26364 {
26365 ix86_first_cycle_multipass_data_t data
26366 = (ix86_first_cycle_multipass_data_t) _data;
26367
26368 data->ifetch_block_len = 0;
26369 data->ifetch_block_n_insns = 0;
26370 data->ready_try_change = NULL;
26371 data->ready_try_change_size = 0;
26372 }
26373
26374 /* Advancing the cycle; reset ifetch block counts. */
26375 static void
26376 core2i7_dfa_post_advance_cycle (void)
26377 {
26378 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26379
26380 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26381
26382 data->ifetch_block_len = 0;
26383 data->ifetch_block_n_insns = 0;
26384 }
26385
26386 static int min_insn_size (rtx);
26387
26388 /* Filter out insns from ready_try that the core will not be able to issue
26389 on current cycle due to decoder. */
26390 static void
26391 core2i7_first_cycle_multipass_filter_ready_try
26392 (const_ix86_first_cycle_multipass_data_t data,
26393 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26394 {
26395 while (n_ready--)
26396 {
26397 rtx insn;
26398 int insn_size;
26399
26400 if (ready_try[n_ready])
26401 continue;
26402
26403 insn = get_ready_element (n_ready);
26404 insn_size = min_insn_size (insn);
26405
26406 if (/* If this is a too long an insn for a secondary decoder ... */
26407 (!first_cycle_insn_p
26408 && insn_size > core2i7_secondary_decoder_max_insn_size)
26409 /* ... or it would not fit into the ifetch block ... */
26410 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26411 /* ... or the decoder is full already ... */
26412 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26413 /* ... mask the insn out. */
26414 {
26415 ready_try[n_ready] = 1;
26416
26417 if (data->ready_try_change)
26418 bitmap_set_bit (data->ready_try_change, n_ready);
26419 }
26420 }
26421 }
26422
26423 /* Prepare for a new round of multipass lookahead scheduling. */
26424 static void
26425 core2i7_first_cycle_multipass_begin (void *_data,
26426 signed char *ready_try, int n_ready,
26427 bool first_cycle_insn_p)
26428 {
26429 ix86_first_cycle_multipass_data_t data
26430 = (ix86_first_cycle_multipass_data_t) _data;
26431 const_ix86_first_cycle_multipass_data_t prev_data
26432 = ix86_first_cycle_multipass_data;
26433
26434 /* Restore the state from the end of the previous round. */
26435 data->ifetch_block_len = prev_data->ifetch_block_len;
26436 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26437
26438 /* Filter instructions that cannot be issued on current cycle due to
26439 decoder restrictions. */
26440 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26441 first_cycle_insn_p);
26442 }
26443
26444 /* INSN is being issued in current solution. Account for its impact on
26445 the decoder model. */
26446 static void
26447 core2i7_first_cycle_multipass_issue (void *_data,
26448 signed char *ready_try, int n_ready,
26449 rtx insn, const void *_prev_data)
26450 {
26451 ix86_first_cycle_multipass_data_t data
26452 = (ix86_first_cycle_multipass_data_t) _data;
26453 const_ix86_first_cycle_multipass_data_t prev_data
26454 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26455
26456 int insn_size = min_insn_size (insn);
26457
26458 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26459 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26460 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26461 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26462
26463 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26464 if (!data->ready_try_change)
26465 {
26466 data->ready_try_change = sbitmap_alloc (n_ready);
26467 data->ready_try_change_size = n_ready;
26468 }
26469 else if (data->ready_try_change_size < n_ready)
26470 {
26471 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26472 n_ready, 0);
26473 data->ready_try_change_size = n_ready;
26474 }
26475 bitmap_clear (data->ready_try_change);
26476
26477 /* Filter out insns from ready_try that the core will not be able to issue
26478 on current cycle due to decoder. */
26479 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26480 false);
26481 }
26482
26483 /* Revert the effect on ready_try. */
26484 static void
26485 core2i7_first_cycle_multipass_backtrack (const void *_data,
26486 signed char *ready_try,
26487 int n_ready ATTRIBUTE_UNUSED)
26488 {
26489 const_ix86_first_cycle_multipass_data_t data
26490 = (const_ix86_first_cycle_multipass_data_t) _data;
26491 unsigned int i = 0;
26492 sbitmap_iterator sbi;
26493
26494 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26495 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26496 {
26497 ready_try[i] = 0;
26498 }
26499 }
26500
26501 /* Save the result of multipass lookahead scheduling for the next round. */
26502 static void
26503 core2i7_first_cycle_multipass_end (const void *_data)
26504 {
26505 const_ix86_first_cycle_multipass_data_t data
26506 = (const_ix86_first_cycle_multipass_data_t) _data;
26507 ix86_first_cycle_multipass_data_t next_data
26508 = ix86_first_cycle_multipass_data;
26509
26510 if (data != NULL)
26511 {
26512 next_data->ifetch_block_len = data->ifetch_block_len;
26513 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26514 }
26515 }
26516
26517 /* Deallocate target data. */
26518 static void
26519 core2i7_first_cycle_multipass_fini (void *_data)
26520 {
26521 ix86_first_cycle_multipass_data_t data
26522 = (ix86_first_cycle_multipass_data_t) _data;
26523
26524 if (data->ready_try_change)
26525 {
26526 sbitmap_free (data->ready_try_change);
26527 data->ready_try_change = NULL;
26528 data->ready_try_change_size = 0;
26529 }
26530 }
26531
26532 /* Prepare for scheduling pass. */
26533 static void
26534 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26535 int verbose ATTRIBUTE_UNUSED,
26536 int max_uid ATTRIBUTE_UNUSED)
26537 {
26538 /* Install scheduling hooks for current CPU. Some of these hooks are used
26539 in time-critical parts of the scheduler, so we only set them up when
26540 they are actually used. */
26541 switch (ix86_tune)
26542 {
26543 case PROCESSOR_CORE2:
26544 case PROCESSOR_NEHALEM:
26545 case PROCESSOR_SANDYBRIDGE:
26546 case PROCESSOR_HASWELL:
26547 /* Do not perform multipass scheduling for pre-reload schedule
26548 to save compile time. */
26549 if (reload_completed)
26550 {
26551 targetm.sched.dfa_post_advance_cycle
26552 = core2i7_dfa_post_advance_cycle;
26553 targetm.sched.first_cycle_multipass_init
26554 = core2i7_first_cycle_multipass_init;
26555 targetm.sched.first_cycle_multipass_begin
26556 = core2i7_first_cycle_multipass_begin;
26557 targetm.sched.first_cycle_multipass_issue
26558 = core2i7_first_cycle_multipass_issue;
26559 targetm.sched.first_cycle_multipass_backtrack
26560 = core2i7_first_cycle_multipass_backtrack;
26561 targetm.sched.first_cycle_multipass_end
26562 = core2i7_first_cycle_multipass_end;
26563 targetm.sched.first_cycle_multipass_fini
26564 = core2i7_first_cycle_multipass_fini;
26565
26566 /* Set decoder parameters. */
26567 core2i7_secondary_decoder_max_insn_size = 8;
26568 core2i7_ifetch_block_size = 16;
26569 core2i7_ifetch_block_max_insns = 6;
26570 break;
26571 }
26572 /* ... Fall through ... */
26573 default:
26574 targetm.sched.dfa_post_advance_cycle = NULL;
26575 targetm.sched.first_cycle_multipass_init = NULL;
26576 targetm.sched.first_cycle_multipass_begin = NULL;
26577 targetm.sched.first_cycle_multipass_issue = NULL;
26578 targetm.sched.first_cycle_multipass_backtrack = NULL;
26579 targetm.sched.first_cycle_multipass_end = NULL;
26580 targetm.sched.first_cycle_multipass_fini = NULL;
26581 break;
26582 }
26583 }
26584
26585 \f
26586 /* Compute the alignment given to a constant that is being placed in memory.
26587 EXP is the constant and ALIGN is the alignment that the object would
26588 ordinarily have.
26589 The value of this function is used instead of that alignment to align
26590 the object. */
26591
26592 int
26593 ix86_constant_alignment (tree exp, int align)
26594 {
26595 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26596 || TREE_CODE (exp) == INTEGER_CST)
26597 {
26598 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26599 return 64;
26600 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26601 return 128;
26602 }
26603 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26604 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26605 return BITS_PER_WORD;
26606
26607 return align;
26608 }
26609
26610 /* Compute the alignment for a static variable.
26611 TYPE is the data type, and ALIGN is the alignment that
26612 the object would ordinarily have. The value of this function is used
26613 instead of that alignment to align the object. */
26614
26615 int
26616 ix86_data_alignment (tree type, int align, bool opt)
26617 {
26618 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26619 for symbols from other compilation units or symbols that don't need
26620 to bind locally. In order to preserve some ABI compatibility with
26621 those compilers, ensure we don't decrease alignment from what we
26622 used to assume. */
26623
26624 int max_align_compat
26625 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26626
26627 /* A data structure, equal or greater than the size of a cache line
26628 (64 bytes in the Pentium 4 and other recent Intel processors, including
26629 processors based on Intel Core microarchitecture) should be aligned
26630 so that its base address is a multiple of a cache line size. */
26631
26632 int max_align
26633 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26634
26635 if (max_align < BITS_PER_WORD)
26636 max_align = BITS_PER_WORD;
26637
26638 if (opt
26639 && AGGREGATE_TYPE_P (type)
26640 && TYPE_SIZE (type)
26641 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26642 {
26643 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26644 && align < max_align_compat)
26645 align = max_align_compat;
26646 if (wi::geu_p (TYPE_SIZE (type), max_align)
26647 && align < max_align)
26648 align = max_align;
26649 }
26650
26651 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26652 to 16byte boundary. */
26653 if (TARGET_64BIT)
26654 {
26655 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26656 && TYPE_SIZE (type)
26657 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26658 && wi::geu_p (TYPE_SIZE (type), 128)
26659 && align < 128)
26660 return 128;
26661 }
26662
26663 if (!opt)
26664 return align;
26665
26666 if (TREE_CODE (type) == ARRAY_TYPE)
26667 {
26668 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26669 return 64;
26670 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26671 return 128;
26672 }
26673 else if (TREE_CODE (type) == COMPLEX_TYPE)
26674 {
26675
26676 if (TYPE_MODE (type) == DCmode && align < 64)
26677 return 64;
26678 if ((TYPE_MODE (type) == XCmode
26679 || TYPE_MODE (type) == TCmode) && align < 128)
26680 return 128;
26681 }
26682 else if ((TREE_CODE (type) == RECORD_TYPE
26683 || TREE_CODE (type) == UNION_TYPE
26684 || TREE_CODE (type) == QUAL_UNION_TYPE)
26685 && TYPE_FIELDS (type))
26686 {
26687 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26688 return 64;
26689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26690 return 128;
26691 }
26692 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26693 || TREE_CODE (type) == INTEGER_TYPE)
26694 {
26695 if (TYPE_MODE (type) == DFmode && align < 64)
26696 return 64;
26697 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26698 return 128;
26699 }
26700
26701 return align;
26702 }
26703
26704 /* Compute the alignment for a local variable or a stack slot. EXP is
26705 the data type or decl itself, MODE is the widest mode available and
26706 ALIGN is the alignment that the object would ordinarily have. The
26707 value of this macro is used instead of that alignment to align the
26708 object. */
26709
26710 unsigned int
26711 ix86_local_alignment (tree exp, enum machine_mode mode,
26712 unsigned int align)
26713 {
26714 tree type, decl;
26715
26716 if (exp && DECL_P (exp))
26717 {
26718 type = TREE_TYPE (exp);
26719 decl = exp;
26720 }
26721 else
26722 {
26723 type = exp;
26724 decl = NULL;
26725 }
26726
26727 /* Don't do dynamic stack realignment for long long objects with
26728 -mpreferred-stack-boundary=2. */
26729 if (!TARGET_64BIT
26730 && align == 64
26731 && ix86_preferred_stack_boundary < 64
26732 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26733 && (!type || !TYPE_USER_ALIGN (type))
26734 && (!decl || !DECL_USER_ALIGN (decl)))
26735 align = 32;
26736
26737 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26738 register in MODE. We will return the largest alignment of XF
26739 and DF. */
26740 if (!type)
26741 {
26742 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26743 align = GET_MODE_ALIGNMENT (DFmode);
26744 return align;
26745 }
26746
26747 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26748 to 16byte boundary. Exact wording is:
26749
26750 An array uses the same alignment as its elements, except that a local or
26751 global array variable of length at least 16 bytes or
26752 a C99 variable-length array variable always has alignment of at least 16 bytes.
26753
26754 This was added to allow use of aligned SSE instructions at arrays. This
26755 rule is meant for static storage (where compiler can not do the analysis
26756 by itself). We follow it for automatic variables only when convenient.
26757 We fully control everything in the function compiled and functions from
26758 other unit can not rely on the alignment.
26759
26760 Exclude va_list type. It is the common case of local array where
26761 we can not benefit from the alignment.
26762
26763 TODO: Probably one should optimize for size only when var is not escaping. */
26764 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26765 && TARGET_SSE)
26766 {
26767 if (AGGREGATE_TYPE_P (type)
26768 && (va_list_type_node == NULL_TREE
26769 || (TYPE_MAIN_VARIANT (type)
26770 != TYPE_MAIN_VARIANT (va_list_type_node)))
26771 && TYPE_SIZE (type)
26772 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26773 && wi::geu_p (TYPE_SIZE (type), 16)
26774 && align < 128)
26775 return 128;
26776 }
26777 if (TREE_CODE (type) == ARRAY_TYPE)
26778 {
26779 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26780 return 64;
26781 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26782 return 128;
26783 }
26784 else if (TREE_CODE (type) == COMPLEX_TYPE)
26785 {
26786 if (TYPE_MODE (type) == DCmode && align < 64)
26787 return 64;
26788 if ((TYPE_MODE (type) == XCmode
26789 || TYPE_MODE (type) == TCmode) && align < 128)
26790 return 128;
26791 }
26792 else if ((TREE_CODE (type) == RECORD_TYPE
26793 || TREE_CODE (type) == UNION_TYPE
26794 || TREE_CODE (type) == QUAL_UNION_TYPE)
26795 && TYPE_FIELDS (type))
26796 {
26797 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26798 return 64;
26799 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26800 return 128;
26801 }
26802 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26803 || TREE_CODE (type) == INTEGER_TYPE)
26804 {
26805
26806 if (TYPE_MODE (type) == DFmode && align < 64)
26807 return 64;
26808 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26809 return 128;
26810 }
26811 return align;
26812 }
26813
26814 /* Compute the minimum required alignment for dynamic stack realignment
26815 purposes for a local variable, parameter or a stack slot. EXP is
26816 the data type or decl itself, MODE is its mode and ALIGN is the
26817 alignment that the object would ordinarily have. */
26818
26819 unsigned int
26820 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26821 unsigned int align)
26822 {
26823 tree type, decl;
26824
26825 if (exp && DECL_P (exp))
26826 {
26827 type = TREE_TYPE (exp);
26828 decl = exp;
26829 }
26830 else
26831 {
26832 type = exp;
26833 decl = NULL;
26834 }
26835
26836 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26837 return align;
26838
26839 /* Don't do dynamic stack realignment for long long objects with
26840 -mpreferred-stack-boundary=2. */
26841 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26842 && (!type || !TYPE_USER_ALIGN (type))
26843 && (!decl || !DECL_USER_ALIGN (decl)))
26844 return 32;
26845
26846 return align;
26847 }
26848 \f
26849 /* Find a location for the static chain incoming to a nested function.
26850 This is a register, unless all free registers are used by arguments. */
26851
26852 static rtx
26853 ix86_static_chain (const_tree fndecl, bool incoming_p)
26854 {
26855 unsigned regno;
26856
26857 if (!DECL_STATIC_CHAIN (fndecl))
26858 return NULL;
26859
26860 if (TARGET_64BIT)
26861 {
26862 /* We always use R10 in 64-bit mode. */
26863 regno = R10_REG;
26864 }
26865 else
26866 {
26867 tree fntype;
26868 unsigned int ccvt;
26869
26870 /* By default in 32-bit mode we use ECX to pass the static chain. */
26871 regno = CX_REG;
26872
26873 fntype = TREE_TYPE (fndecl);
26874 ccvt = ix86_get_callcvt (fntype);
26875 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26876 {
26877 /* Fastcall functions use ecx/edx for arguments, which leaves
26878 us with EAX for the static chain.
26879 Thiscall functions use ecx for arguments, which also
26880 leaves us with EAX for the static chain. */
26881 regno = AX_REG;
26882 }
26883 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26884 {
26885 /* Thiscall functions use ecx for arguments, which leaves
26886 us with EAX and EDX for the static chain.
26887 We are using for abi-compatibility EAX. */
26888 regno = AX_REG;
26889 }
26890 else if (ix86_function_regparm (fntype, fndecl) == 3)
26891 {
26892 /* For regparm 3, we have no free call-clobbered registers in
26893 which to store the static chain. In order to implement this,
26894 we have the trampoline push the static chain to the stack.
26895 However, we can't push a value below the return address when
26896 we call the nested function directly, so we have to use an
26897 alternate entry point. For this we use ESI, and have the
26898 alternate entry point push ESI, so that things appear the
26899 same once we're executing the nested function. */
26900 if (incoming_p)
26901 {
26902 if (fndecl == current_function_decl)
26903 ix86_static_chain_on_stack = true;
26904 return gen_frame_mem (SImode,
26905 plus_constant (Pmode,
26906 arg_pointer_rtx, -8));
26907 }
26908 regno = SI_REG;
26909 }
26910 }
26911
26912 return gen_rtx_REG (Pmode, regno);
26913 }
26914
26915 /* Emit RTL insns to initialize the variable parts of a trampoline.
26916 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26917 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26918 to be passed to the target function. */
26919
26920 static void
26921 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26922 {
26923 rtx mem, fnaddr;
26924 int opcode;
26925 int offset = 0;
26926
26927 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26928
26929 if (TARGET_64BIT)
26930 {
26931 int size;
26932
26933 /* Load the function address to r11. Try to load address using
26934 the shorter movl instead of movabs. We may want to support
26935 movq for kernel mode, but kernel does not use trampolines at
26936 the moment. FNADDR is a 32bit address and may not be in
26937 DImode when ptr_mode == SImode. Always use movl in this
26938 case. */
26939 if (ptr_mode == SImode
26940 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26941 {
26942 fnaddr = copy_addr_to_reg (fnaddr);
26943
26944 mem = adjust_address (m_tramp, HImode, offset);
26945 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26946
26947 mem = adjust_address (m_tramp, SImode, offset + 2);
26948 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26949 offset += 6;
26950 }
26951 else
26952 {
26953 mem = adjust_address (m_tramp, HImode, offset);
26954 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26955
26956 mem = adjust_address (m_tramp, DImode, offset + 2);
26957 emit_move_insn (mem, fnaddr);
26958 offset += 10;
26959 }
26960
26961 /* Load static chain using movabs to r10. Use the shorter movl
26962 instead of movabs when ptr_mode == SImode. */
26963 if (ptr_mode == SImode)
26964 {
26965 opcode = 0xba41;
26966 size = 6;
26967 }
26968 else
26969 {
26970 opcode = 0xba49;
26971 size = 10;
26972 }
26973
26974 mem = adjust_address (m_tramp, HImode, offset);
26975 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26976
26977 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26978 emit_move_insn (mem, chain_value);
26979 offset += size;
26980
26981 /* Jump to r11; the last (unused) byte is a nop, only there to
26982 pad the write out to a single 32-bit store. */
26983 mem = adjust_address (m_tramp, SImode, offset);
26984 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26985 offset += 4;
26986 }
26987 else
26988 {
26989 rtx disp, chain;
26990
26991 /* Depending on the static chain location, either load a register
26992 with a constant, or push the constant to the stack. All of the
26993 instructions are the same size. */
26994 chain = ix86_static_chain (fndecl, true);
26995 if (REG_P (chain))
26996 {
26997 switch (REGNO (chain))
26998 {
26999 case AX_REG:
27000 opcode = 0xb8; break;
27001 case CX_REG:
27002 opcode = 0xb9; break;
27003 default:
27004 gcc_unreachable ();
27005 }
27006 }
27007 else
27008 opcode = 0x68;
27009
27010 mem = adjust_address (m_tramp, QImode, offset);
27011 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27012
27013 mem = adjust_address (m_tramp, SImode, offset + 1);
27014 emit_move_insn (mem, chain_value);
27015 offset += 5;
27016
27017 mem = adjust_address (m_tramp, QImode, offset);
27018 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27019
27020 mem = adjust_address (m_tramp, SImode, offset + 1);
27021
27022 /* Compute offset from the end of the jmp to the target function.
27023 In the case in which the trampoline stores the static chain on
27024 the stack, we need to skip the first insn which pushes the
27025 (call-saved) register static chain; this push is 1 byte. */
27026 offset += 5;
27027 disp = expand_binop (SImode, sub_optab, fnaddr,
27028 plus_constant (Pmode, XEXP (m_tramp, 0),
27029 offset - (MEM_P (chain) ? 1 : 0)),
27030 NULL_RTX, 1, OPTAB_DIRECT);
27031 emit_move_insn (mem, disp);
27032 }
27033
27034 gcc_assert (offset <= TRAMPOLINE_SIZE);
27035
27036 #ifdef HAVE_ENABLE_EXECUTE_STACK
27037 #ifdef CHECK_EXECUTE_STACK_ENABLED
27038 if (CHECK_EXECUTE_STACK_ENABLED)
27039 #endif
27040 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27041 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27042 #endif
27043 }
27044 \f
27045 /* The following file contains several enumerations and data structures
27046 built from the definitions in i386-builtin-types.def. */
27047
27048 #include "i386-builtin-types.inc"
27049
27050 /* Table for the ix86 builtin non-function types. */
27051 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27052
27053 /* Retrieve an element from the above table, building some of
27054 the types lazily. */
27055
27056 static tree
27057 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27058 {
27059 unsigned int index;
27060 tree type, itype;
27061
27062 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27063
27064 type = ix86_builtin_type_tab[(int) tcode];
27065 if (type != NULL)
27066 return type;
27067
27068 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27069 if (tcode <= IX86_BT_LAST_VECT)
27070 {
27071 enum machine_mode mode;
27072
27073 index = tcode - IX86_BT_LAST_PRIM - 1;
27074 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27075 mode = ix86_builtin_type_vect_mode[index];
27076
27077 type = build_vector_type_for_mode (itype, mode);
27078 }
27079 else
27080 {
27081 int quals;
27082
27083 index = tcode - IX86_BT_LAST_VECT - 1;
27084 if (tcode <= IX86_BT_LAST_PTR)
27085 quals = TYPE_UNQUALIFIED;
27086 else
27087 quals = TYPE_QUAL_CONST;
27088
27089 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27090 if (quals != TYPE_UNQUALIFIED)
27091 itype = build_qualified_type (itype, quals);
27092
27093 type = build_pointer_type (itype);
27094 }
27095
27096 ix86_builtin_type_tab[(int) tcode] = type;
27097 return type;
27098 }
27099
27100 /* Table for the ix86 builtin function types. */
27101 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27102
27103 /* Retrieve an element from the above table, building some of
27104 the types lazily. */
27105
27106 static tree
27107 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27108 {
27109 tree type;
27110
27111 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27112
27113 type = ix86_builtin_func_type_tab[(int) tcode];
27114 if (type != NULL)
27115 return type;
27116
27117 if (tcode <= IX86_BT_LAST_FUNC)
27118 {
27119 unsigned start = ix86_builtin_func_start[(int) tcode];
27120 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27121 tree rtype, atype, args = void_list_node;
27122 unsigned i;
27123
27124 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27125 for (i = after - 1; i > start; --i)
27126 {
27127 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27128 args = tree_cons (NULL, atype, args);
27129 }
27130
27131 type = build_function_type (rtype, args);
27132 }
27133 else
27134 {
27135 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27136 enum ix86_builtin_func_type icode;
27137
27138 icode = ix86_builtin_func_alias_base[index];
27139 type = ix86_get_builtin_func_type (icode);
27140 }
27141
27142 ix86_builtin_func_type_tab[(int) tcode] = type;
27143 return type;
27144 }
27145
27146
27147 /* Codes for all the SSE/MMX builtins. */
27148 enum ix86_builtins
27149 {
27150 IX86_BUILTIN_ADDPS,
27151 IX86_BUILTIN_ADDSS,
27152 IX86_BUILTIN_DIVPS,
27153 IX86_BUILTIN_DIVSS,
27154 IX86_BUILTIN_MULPS,
27155 IX86_BUILTIN_MULSS,
27156 IX86_BUILTIN_SUBPS,
27157 IX86_BUILTIN_SUBSS,
27158
27159 IX86_BUILTIN_CMPEQPS,
27160 IX86_BUILTIN_CMPLTPS,
27161 IX86_BUILTIN_CMPLEPS,
27162 IX86_BUILTIN_CMPGTPS,
27163 IX86_BUILTIN_CMPGEPS,
27164 IX86_BUILTIN_CMPNEQPS,
27165 IX86_BUILTIN_CMPNLTPS,
27166 IX86_BUILTIN_CMPNLEPS,
27167 IX86_BUILTIN_CMPNGTPS,
27168 IX86_BUILTIN_CMPNGEPS,
27169 IX86_BUILTIN_CMPORDPS,
27170 IX86_BUILTIN_CMPUNORDPS,
27171 IX86_BUILTIN_CMPEQSS,
27172 IX86_BUILTIN_CMPLTSS,
27173 IX86_BUILTIN_CMPLESS,
27174 IX86_BUILTIN_CMPNEQSS,
27175 IX86_BUILTIN_CMPNLTSS,
27176 IX86_BUILTIN_CMPNLESS,
27177 IX86_BUILTIN_CMPORDSS,
27178 IX86_BUILTIN_CMPUNORDSS,
27179
27180 IX86_BUILTIN_COMIEQSS,
27181 IX86_BUILTIN_COMILTSS,
27182 IX86_BUILTIN_COMILESS,
27183 IX86_BUILTIN_COMIGTSS,
27184 IX86_BUILTIN_COMIGESS,
27185 IX86_BUILTIN_COMINEQSS,
27186 IX86_BUILTIN_UCOMIEQSS,
27187 IX86_BUILTIN_UCOMILTSS,
27188 IX86_BUILTIN_UCOMILESS,
27189 IX86_BUILTIN_UCOMIGTSS,
27190 IX86_BUILTIN_UCOMIGESS,
27191 IX86_BUILTIN_UCOMINEQSS,
27192
27193 IX86_BUILTIN_CVTPI2PS,
27194 IX86_BUILTIN_CVTPS2PI,
27195 IX86_BUILTIN_CVTSI2SS,
27196 IX86_BUILTIN_CVTSI642SS,
27197 IX86_BUILTIN_CVTSS2SI,
27198 IX86_BUILTIN_CVTSS2SI64,
27199 IX86_BUILTIN_CVTTPS2PI,
27200 IX86_BUILTIN_CVTTSS2SI,
27201 IX86_BUILTIN_CVTTSS2SI64,
27202
27203 IX86_BUILTIN_MAXPS,
27204 IX86_BUILTIN_MAXSS,
27205 IX86_BUILTIN_MINPS,
27206 IX86_BUILTIN_MINSS,
27207
27208 IX86_BUILTIN_LOADUPS,
27209 IX86_BUILTIN_STOREUPS,
27210 IX86_BUILTIN_MOVSS,
27211
27212 IX86_BUILTIN_MOVHLPS,
27213 IX86_BUILTIN_MOVLHPS,
27214 IX86_BUILTIN_LOADHPS,
27215 IX86_BUILTIN_LOADLPS,
27216 IX86_BUILTIN_STOREHPS,
27217 IX86_BUILTIN_STORELPS,
27218
27219 IX86_BUILTIN_MASKMOVQ,
27220 IX86_BUILTIN_MOVMSKPS,
27221 IX86_BUILTIN_PMOVMSKB,
27222
27223 IX86_BUILTIN_MOVNTPS,
27224 IX86_BUILTIN_MOVNTQ,
27225
27226 IX86_BUILTIN_LOADDQU,
27227 IX86_BUILTIN_STOREDQU,
27228
27229 IX86_BUILTIN_PACKSSWB,
27230 IX86_BUILTIN_PACKSSDW,
27231 IX86_BUILTIN_PACKUSWB,
27232
27233 IX86_BUILTIN_PADDB,
27234 IX86_BUILTIN_PADDW,
27235 IX86_BUILTIN_PADDD,
27236 IX86_BUILTIN_PADDQ,
27237 IX86_BUILTIN_PADDSB,
27238 IX86_BUILTIN_PADDSW,
27239 IX86_BUILTIN_PADDUSB,
27240 IX86_BUILTIN_PADDUSW,
27241 IX86_BUILTIN_PSUBB,
27242 IX86_BUILTIN_PSUBW,
27243 IX86_BUILTIN_PSUBD,
27244 IX86_BUILTIN_PSUBQ,
27245 IX86_BUILTIN_PSUBSB,
27246 IX86_BUILTIN_PSUBSW,
27247 IX86_BUILTIN_PSUBUSB,
27248 IX86_BUILTIN_PSUBUSW,
27249
27250 IX86_BUILTIN_PAND,
27251 IX86_BUILTIN_PANDN,
27252 IX86_BUILTIN_POR,
27253 IX86_BUILTIN_PXOR,
27254
27255 IX86_BUILTIN_PAVGB,
27256 IX86_BUILTIN_PAVGW,
27257
27258 IX86_BUILTIN_PCMPEQB,
27259 IX86_BUILTIN_PCMPEQW,
27260 IX86_BUILTIN_PCMPEQD,
27261 IX86_BUILTIN_PCMPGTB,
27262 IX86_BUILTIN_PCMPGTW,
27263 IX86_BUILTIN_PCMPGTD,
27264
27265 IX86_BUILTIN_PMADDWD,
27266
27267 IX86_BUILTIN_PMAXSW,
27268 IX86_BUILTIN_PMAXUB,
27269 IX86_BUILTIN_PMINSW,
27270 IX86_BUILTIN_PMINUB,
27271
27272 IX86_BUILTIN_PMULHUW,
27273 IX86_BUILTIN_PMULHW,
27274 IX86_BUILTIN_PMULLW,
27275
27276 IX86_BUILTIN_PSADBW,
27277 IX86_BUILTIN_PSHUFW,
27278
27279 IX86_BUILTIN_PSLLW,
27280 IX86_BUILTIN_PSLLD,
27281 IX86_BUILTIN_PSLLQ,
27282 IX86_BUILTIN_PSRAW,
27283 IX86_BUILTIN_PSRAD,
27284 IX86_BUILTIN_PSRLW,
27285 IX86_BUILTIN_PSRLD,
27286 IX86_BUILTIN_PSRLQ,
27287 IX86_BUILTIN_PSLLWI,
27288 IX86_BUILTIN_PSLLDI,
27289 IX86_BUILTIN_PSLLQI,
27290 IX86_BUILTIN_PSRAWI,
27291 IX86_BUILTIN_PSRADI,
27292 IX86_BUILTIN_PSRLWI,
27293 IX86_BUILTIN_PSRLDI,
27294 IX86_BUILTIN_PSRLQI,
27295
27296 IX86_BUILTIN_PUNPCKHBW,
27297 IX86_BUILTIN_PUNPCKHWD,
27298 IX86_BUILTIN_PUNPCKHDQ,
27299 IX86_BUILTIN_PUNPCKLBW,
27300 IX86_BUILTIN_PUNPCKLWD,
27301 IX86_BUILTIN_PUNPCKLDQ,
27302
27303 IX86_BUILTIN_SHUFPS,
27304
27305 IX86_BUILTIN_RCPPS,
27306 IX86_BUILTIN_RCPSS,
27307 IX86_BUILTIN_RSQRTPS,
27308 IX86_BUILTIN_RSQRTPS_NR,
27309 IX86_BUILTIN_RSQRTSS,
27310 IX86_BUILTIN_RSQRTF,
27311 IX86_BUILTIN_SQRTPS,
27312 IX86_BUILTIN_SQRTPS_NR,
27313 IX86_BUILTIN_SQRTSS,
27314
27315 IX86_BUILTIN_UNPCKHPS,
27316 IX86_BUILTIN_UNPCKLPS,
27317
27318 IX86_BUILTIN_ANDPS,
27319 IX86_BUILTIN_ANDNPS,
27320 IX86_BUILTIN_ORPS,
27321 IX86_BUILTIN_XORPS,
27322
27323 IX86_BUILTIN_EMMS,
27324 IX86_BUILTIN_LDMXCSR,
27325 IX86_BUILTIN_STMXCSR,
27326 IX86_BUILTIN_SFENCE,
27327
27328 IX86_BUILTIN_FXSAVE,
27329 IX86_BUILTIN_FXRSTOR,
27330 IX86_BUILTIN_FXSAVE64,
27331 IX86_BUILTIN_FXRSTOR64,
27332
27333 IX86_BUILTIN_XSAVE,
27334 IX86_BUILTIN_XRSTOR,
27335 IX86_BUILTIN_XSAVE64,
27336 IX86_BUILTIN_XRSTOR64,
27337
27338 IX86_BUILTIN_XSAVEOPT,
27339 IX86_BUILTIN_XSAVEOPT64,
27340
27341 IX86_BUILTIN_XSAVEC,
27342 IX86_BUILTIN_XSAVEC64,
27343
27344 IX86_BUILTIN_XSAVES,
27345 IX86_BUILTIN_XRSTORS,
27346 IX86_BUILTIN_XSAVES64,
27347 IX86_BUILTIN_XRSTORS64,
27348
27349 /* 3DNow! Original */
27350 IX86_BUILTIN_FEMMS,
27351 IX86_BUILTIN_PAVGUSB,
27352 IX86_BUILTIN_PF2ID,
27353 IX86_BUILTIN_PFACC,
27354 IX86_BUILTIN_PFADD,
27355 IX86_BUILTIN_PFCMPEQ,
27356 IX86_BUILTIN_PFCMPGE,
27357 IX86_BUILTIN_PFCMPGT,
27358 IX86_BUILTIN_PFMAX,
27359 IX86_BUILTIN_PFMIN,
27360 IX86_BUILTIN_PFMUL,
27361 IX86_BUILTIN_PFRCP,
27362 IX86_BUILTIN_PFRCPIT1,
27363 IX86_BUILTIN_PFRCPIT2,
27364 IX86_BUILTIN_PFRSQIT1,
27365 IX86_BUILTIN_PFRSQRT,
27366 IX86_BUILTIN_PFSUB,
27367 IX86_BUILTIN_PFSUBR,
27368 IX86_BUILTIN_PI2FD,
27369 IX86_BUILTIN_PMULHRW,
27370
27371 /* 3DNow! Athlon Extensions */
27372 IX86_BUILTIN_PF2IW,
27373 IX86_BUILTIN_PFNACC,
27374 IX86_BUILTIN_PFPNACC,
27375 IX86_BUILTIN_PI2FW,
27376 IX86_BUILTIN_PSWAPDSI,
27377 IX86_BUILTIN_PSWAPDSF,
27378
27379 /* SSE2 */
27380 IX86_BUILTIN_ADDPD,
27381 IX86_BUILTIN_ADDSD,
27382 IX86_BUILTIN_DIVPD,
27383 IX86_BUILTIN_DIVSD,
27384 IX86_BUILTIN_MULPD,
27385 IX86_BUILTIN_MULSD,
27386 IX86_BUILTIN_SUBPD,
27387 IX86_BUILTIN_SUBSD,
27388
27389 IX86_BUILTIN_CMPEQPD,
27390 IX86_BUILTIN_CMPLTPD,
27391 IX86_BUILTIN_CMPLEPD,
27392 IX86_BUILTIN_CMPGTPD,
27393 IX86_BUILTIN_CMPGEPD,
27394 IX86_BUILTIN_CMPNEQPD,
27395 IX86_BUILTIN_CMPNLTPD,
27396 IX86_BUILTIN_CMPNLEPD,
27397 IX86_BUILTIN_CMPNGTPD,
27398 IX86_BUILTIN_CMPNGEPD,
27399 IX86_BUILTIN_CMPORDPD,
27400 IX86_BUILTIN_CMPUNORDPD,
27401 IX86_BUILTIN_CMPEQSD,
27402 IX86_BUILTIN_CMPLTSD,
27403 IX86_BUILTIN_CMPLESD,
27404 IX86_BUILTIN_CMPNEQSD,
27405 IX86_BUILTIN_CMPNLTSD,
27406 IX86_BUILTIN_CMPNLESD,
27407 IX86_BUILTIN_CMPORDSD,
27408 IX86_BUILTIN_CMPUNORDSD,
27409
27410 IX86_BUILTIN_COMIEQSD,
27411 IX86_BUILTIN_COMILTSD,
27412 IX86_BUILTIN_COMILESD,
27413 IX86_BUILTIN_COMIGTSD,
27414 IX86_BUILTIN_COMIGESD,
27415 IX86_BUILTIN_COMINEQSD,
27416 IX86_BUILTIN_UCOMIEQSD,
27417 IX86_BUILTIN_UCOMILTSD,
27418 IX86_BUILTIN_UCOMILESD,
27419 IX86_BUILTIN_UCOMIGTSD,
27420 IX86_BUILTIN_UCOMIGESD,
27421 IX86_BUILTIN_UCOMINEQSD,
27422
27423 IX86_BUILTIN_MAXPD,
27424 IX86_BUILTIN_MAXSD,
27425 IX86_BUILTIN_MINPD,
27426 IX86_BUILTIN_MINSD,
27427
27428 IX86_BUILTIN_ANDPD,
27429 IX86_BUILTIN_ANDNPD,
27430 IX86_BUILTIN_ORPD,
27431 IX86_BUILTIN_XORPD,
27432
27433 IX86_BUILTIN_SQRTPD,
27434 IX86_BUILTIN_SQRTSD,
27435
27436 IX86_BUILTIN_UNPCKHPD,
27437 IX86_BUILTIN_UNPCKLPD,
27438
27439 IX86_BUILTIN_SHUFPD,
27440
27441 IX86_BUILTIN_LOADUPD,
27442 IX86_BUILTIN_STOREUPD,
27443 IX86_BUILTIN_MOVSD,
27444
27445 IX86_BUILTIN_LOADHPD,
27446 IX86_BUILTIN_LOADLPD,
27447
27448 IX86_BUILTIN_CVTDQ2PD,
27449 IX86_BUILTIN_CVTDQ2PS,
27450
27451 IX86_BUILTIN_CVTPD2DQ,
27452 IX86_BUILTIN_CVTPD2PI,
27453 IX86_BUILTIN_CVTPD2PS,
27454 IX86_BUILTIN_CVTTPD2DQ,
27455 IX86_BUILTIN_CVTTPD2PI,
27456
27457 IX86_BUILTIN_CVTPI2PD,
27458 IX86_BUILTIN_CVTSI2SD,
27459 IX86_BUILTIN_CVTSI642SD,
27460
27461 IX86_BUILTIN_CVTSD2SI,
27462 IX86_BUILTIN_CVTSD2SI64,
27463 IX86_BUILTIN_CVTSD2SS,
27464 IX86_BUILTIN_CVTSS2SD,
27465 IX86_BUILTIN_CVTTSD2SI,
27466 IX86_BUILTIN_CVTTSD2SI64,
27467
27468 IX86_BUILTIN_CVTPS2DQ,
27469 IX86_BUILTIN_CVTPS2PD,
27470 IX86_BUILTIN_CVTTPS2DQ,
27471
27472 IX86_BUILTIN_MOVNTI,
27473 IX86_BUILTIN_MOVNTI64,
27474 IX86_BUILTIN_MOVNTPD,
27475 IX86_BUILTIN_MOVNTDQ,
27476
27477 IX86_BUILTIN_MOVQ128,
27478
27479 /* SSE2 MMX */
27480 IX86_BUILTIN_MASKMOVDQU,
27481 IX86_BUILTIN_MOVMSKPD,
27482 IX86_BUILTIN_PMOVMSKB128,
27483
27484 IX86_BUILTIN_PACKSSWB128,
27485 IX86_BUILTIN_PACKSSDW128,
27486 IX86_BUILTIN_PACKUSWB128,
27487
27488 IX86_BUILTIN_PADDB128,
27489 IX86_BUILTIN_PADDW128,
27490 IX86_BUILTIN_PADDD128,
27491 IX86_BUILTIN_PADDQ128,
27492 IX86_BUILTIN_PADDSB128,
27493 IX86_BUILTIN_PADDSW128,
27494 IX86_BUILTIN_PADDUSB128,
27495 IX86_BUILTIN_PADDUSW128,
27496 IX86_BUILTIN_PSUBB128,
27497 IX86_BUILTIN_PSUBW128,
27498 IX86_BUILTIN_PSUBD128,
27499 IX86_BUILTIN_PSUBQ128,
27500 IX86_BUILTIN_PSUBSB128,
27501 IX86_BUILTIN_PSUBSW128,
27502 IX86_BUILTIN_PSUBUSB128,
27503 IX86_BUILTIN_PSUBUSW128,
27504
27505 IX86_BUILTIN_PAND128,
27506 IX86_BUILTIN_PANDN128,
27507 IX86_BUILTIN_POR128,
27508 IX86_BUILTIN_PXOR128,
27509
27510 IX86_BUILTIN_PAVGB128,
27511 IX86_BUILTIN_PAVGW128,
27512
27513 IX86_BUILTIN_PCMPEQB128,
27514 IX86_BUILTIN_PCMPEQW128,
27515 IX86_BUILTIN_PCMPEQD128,
27516 IX86_BUILTIN_PCMPGTB128,
27517 IX86_BUILTIN_PCMPGTW128,
27518 IX86_BUILTIN_PCMPGTD128,
27519
27520 IX86_BUILTIN_PMADDWD128,
27521
27522 IX86_BUILTIN_PMAXSW128,
27523 IX86_BUILTIN_PMAXUB128,
27524 IX86_BUILTIN_PMINSW128,
27525 IX86_BUILTIN_PMINUB128,
27526
27527 IX86_BUILTIN_PMULUDQ,
27528 IX86_BUILTIN_PMULUDQ128,
27529 IX86_BUILTIN_PMULHUW128,
27530 IX86_BUILTIN_PMULHW128,
27531 IX86_BUILTIN_PMULLW128,
27532
27533 IX86_BUILTIN_PSADBW128,
27534 IX86_BUILTIN_PSHUFHW,
27535 IX86_BUILTIN_PSHUFLW,
27536 IX86_BUILTIN_PSHUFD,
27537
27538 IX86_BUILTIN_PSLLDQI128,
27539 IX86_BUILTIN_PSLLWI128,
27540 IX86_BUILTIN_PSLLDI128,
27541 IX86_BUILTIN_PSLLQI128,
27542 IX86_BUILTIN_PSRAWI128,
27543 IX86_BUILTIN_PSRADI128,
27544 IX86_BUILTIN_PSRLDQI128,
27545 IX86_BUILTIN_PSRLWI128,
27546 IX86_BUILTIN_PSRLDI128,
27547 IX86_BUILTIN_PSRLQI128,
27548
27549 IX86_BUILTIN_PSLLDQ128,
27550 IX86_BUILTIN_PSLLW128,
27551 IX86_BUILTIN_PSLLD128,
27552 IX86_BUILTIN_PSLLQ128,
27553 IX86_BUILTIN_PSRAW128,
27554 IX86_BUILTIN_PSRAD128,
27555 IX86_BUILTIN_PSRLW128,
27556 IX86_BUILTIN_PSRLD128,
27557 IX86_BUILTIN_PSRLQ128,
27558
27559 IX86_BUILTIN_PUNPCKHBW128,
27560 IX86_BUILTIN_PUNPCKHWD128,
27561 IX86_BUILTIN_PUNPCKHDQ128,
27562 IX86_BUILTIN_PUNPCKHQDQ128,
27563 IX86_BUILTIN_PUNPCKLBW128,
27564 IX86_BUILTIN_PUNPCKLWD128,
27565 IX86_BUILTIN_PUNPCKLDQ128,
27566 IX86_BUILTIN_PUNPCKLQDQ128,
27567
27568 IX86_BUILTIN_CLFLUSH,
27569 IX86_BUILTIN_MFENCE,
27570 IX86_BUILTIN_LFENCE,
27571 IX86_BUILTIN_PAUSE,
27572
27573 IX86_BUILTIN_FNSTENV,
27574 IX86_BUILTIN_FLDENV,
27575 IX86_BUILTIN_FNSTSW,
27576 IX86_BUILTIN_FNCLEX,
27577
27578 IX86_BUILTIN_BSRSI,
27579 IX86_BUILTIN_BSRDI,
27580 IX86_BUILTIN_RDPMC,
27581 IX86_BUILTIN_RDTSC,
27582 IX86_BUILTIN_RDTSCP,
27583 IX86_BUILTIN_ROLQI,
27584 IX86_BUILTIN_ROLHI,
27585 IX86_BUILTIN_RORQI,
27586 IX86_BUILTIN_RORHI,
27587
27588 /* SSE3. */
27589 IX86_BUILTIN_ADDSUBPS,
27590 IX86_BUILTIN_HADDPS,
27591 IX86_BUILTIN_HSUBPS,
27592 IX86_BUILTIN_MOVSHDUP,
27593 IX86_BUILTIN_MOVSLDUP,
27594 IX86_BUILTIN_ADDSUBPD,
27595 IX86_BUILTIN_HADDPD,
27596 IX86_BUILTIN_HSUBPD,
27597 IX86_BUILTIN_LDDQU,
27598
27599 IX86_BUILTIN_MONITOR,
27600 IX86_BUILTIN_MWAIT,
27601
27602 /* SSSE3. */
27603 IX86_BUILTIN_PHADDW,
27604 IX86_BUILTIN_PHADDD,
27605 IX86_BUILTIN_PHADDSW,
27606 IX86_BUILTIN_PHSUBW,
27607 IX86_BUILTIN_PHSUBD,
27608 IX86_BUILTIN_PHSUBSW,
27609 IX86_BUILTIN_PMADDUBSW,
27610 IX86_BUILTIN_PMULHRSW,
27611 IX86_BUILTIN_PSHUFB,
27612 IX86_BUILTIN_PSIGNB,
27613 IX86_BUILTIN_PSIGNW,
27614 IX86_BUILTIN_PSIGND,
27615 IX86_BUILTIN_PALIGNR,
27616 IX86_BUILTIN_PABSB,
27617 IX86_BUILTIN_PABSW,
27618 IX86_BUILTIN_PABSD,
27619
27620 IX86_BUILTIN_PHADDW128,
27621 IX86_BUILTIN_PHADDD128,
27622 IX86_BUILTIN_PHADDSW128,
27623 IX86_BUILTIN_PHSUBW128,
27624 IX86_BUILTIN_PHSUBD128,
27625 IX86_BUILTIN_PHSUBSW128,
27626 IX86_BUILTIN_PMADDUBSW128,
27627 IX86_BUILTIN_PMULHRSW128,
27628 IX86_BUILTIN_PSHUFB128,
27629 IX86_BUILTIN_PSIGNB128,
27630 IX86_BUILTIN_PSIGNW128,
27631 IX86_BUILTIN_PSIGND128,
27632 IX86_BUILTIN_PALIGNR128,
27633 IX86_BUILTIN_PABSB128,
27634 IX86_BUILTIN_PABSW128,
27635 IX86_BUILTIN_PABSD128,
27636
27637 /* AMDFAM10 - SSE4A New Instructions. */
27638 IX86_BUILTIN_MOVNTSD,
27639 IX86_BUILTIN_MOVNTSS,
27640 IX86_BUILTIN_EXTRQI,
27641 IX86_BUILTIN_EXTRQ,
27642 IX86_BUILTIN_INSERTQI,
27643 IX86_BUILTIN_INSERTQ,
27644
27645 /* SSE4.1. */
27646 IX86_BUILTIN_BLENDPD,
27647 IX86_BUILTIN_BLENDPS,
27648 IX86_BUILTIN_BLENDVPD,
27649 IX86_BUILTIN_BLENDVPS,
27650 IX86_BUILTIN_PBLENDVB128,
27651 IX86_BUILTIN_PBLENDW128,
27652
27653 IX86_BUILTIN_DPPD,
27654 IX86_BUILTIN_DPPS,
27655
27656 IX86_BUILTIN_INSERTPS128,
27657
27658 IX86_BUILTIN_MOVNTDQA,
27659 IX86_BUILTIN_MPSADBW128,
27660 IX86_BUILTIN_PACKUSDW128,
27661 IX86_BUILTIN_PCMPEQQ,
27662 IX86_BUILTIN_PHMINPOSUW128,
27663
27664 IX86_BUILTIN_PMAXSB128,
27665 IX86_BUILTIN_PMAXSD128,
27666 IX86_BUILTIN_PMAXUD128,
27667 IX86_BUILTIN_PMAXUW128,
27668
27669 IX86_BUILTIN_PMINSB128,
27670 IX86_BUILTIN_PMINSD128,
27671 IX86_BUILTIN_PMINUD128,
27672 IX86_BUILTIN_PMINUW128,
27673
27674 IX86_BUILTIN_PMOVSXBW128,
27675 IX86_BUILTIN_PMOVSXBD128,
27676 IX86_BUILTIN_PMOVSXBQ128,
27677 IX86_BUILTIN_PMOVSXWD128,
27678 IX86_BUILTIN_PMOVSXWQ128,
27679 IX86_BUILTIN_PMOVSXDQ128,
27680
27681 IX86_BUILTIN_PMOVZXBW128,
27682 IX86_BUILTIN_PMOVZXBD128,
27683 IX86_BUILTIN_PMOVZXBQ128,
27684 IX86_BUILTIN_PMOVZXWD128,
27685 IX86_BUILTIN_PMOVZXWQ128,
27686 IX86_BUILTIN_PMOVZXDQ128,
27687
27688 IX86_BUILTIN_PMULDQ128,
27689 IX86_BUILTIN_PMULLD128,
27690
27691 IX86_BUILTIN_ROUNDSD,
27692 IX86_BUILTIN_ROUNDSS,
27693
27694 IX86_BUILTIN_ROUNDPD,
27695 IX86_BUILTIN_ROUNDPS,
27696
27697 IX86_BUILTIN_FLOORPD,
27698 IX86_BUILTIN_CEILPD,
27699 IX86_BUILTIN_TRUNCPD,
27700 IX86_BUILTIN_RINTPD,
27701 IX86_BUILTIN_ROUNDPD_AZ,
27702
27703 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27704 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27705 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27706
27707 IX86_BUILTIN_FLOORPS,
27708 IX86_BUILTIN_CEILPS,
27709 IX86_BUILTIN_TRUNCPS,
27710 IX86_BUILTIN_RINTPS,
27711 IX86_BUILTIN_ROUNDPS_AZ,
27712
27713 IX86_BUILTIN_FLOORPS_SFIX,
27714 IX86_BUILTIN_CEILPS_SFIX,
27715 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27716
27717 IX86_BUILTIN_PTESTZ,
27718 IX86_BUILTIN_PTESTC,
27719 IX86_BUILTIN_PTESTNZC,
27720
27721 IX86_BUILTIN_VEC_INIT_V2SI,
27722 IX86_BUILTIN_VEC_INIT_V4HI,
27723 IX86_BUILTIN_VEC_INIT_V8QI,
27724 IX86_BUILTIN_VEC_EXT_V2DF,
27725 IX86_BUILTIN_VEC_EXT_V2DI,
27726 IX86_BUILTIN_VEC_EXT_V4SF,
27727 IX86_BUILTIN_VEC_EXT_V4SI,
27728 IX86_BUILTIN_VEC_EXT_V8HI,
27729 IX86_BUILTIN_VEC_EXT_V2SI,
27730 IX86_BUILTIN_VEC_EXT_V4HI,
27731 IX86_BUILTIN_VEC_EXT_V16QI,
27732 IX86_BUILTIN_VEC_SET_V2DI,
27733 IX86_BUILTIN_VEC_SET_V4SF,
27734 IX86_BUILTIN_VEC_SET_V4SI,
27735 IX86_BUILTIN_VEC_SET_V8HI,
27736 IX86_BUILTIN_VEC_SET_V4HI,
27737 IX86_BUILTIN_VEC_SET_V16QI,
27738
27739 IX86_BUILTIN_VEC_PACK_SFIX,
27740 IX86_BUILTIN_VEC_PACK_SFIX256,
27741
27742 /* SSE4.2. */
27743 IX86_BUILTIN_CRC32QI,
27744 IX86_BUILTIN_CRC32HI,
27745 IX86_BUILTIN_CRC32SI,
27746 IX86_BUILTIN_CRC32DI,
27747
27748 IX86_BUILTIN_PCMPESTRI128,
27749 IX86_BUILTIN_PCMPESTRM128,
27750 IX86_BUILTIN_PCMPESTRA128,
27751 IX86_BUILTIN_PCMPESTRC128,
27752 IX86_BUILTIN_PCMPESTRO128,
27753 IX86_BUILTIN_PCMPESTRS128,
27754 IX86_BUILTIN_PCMPESTRZ128,
27755 IX86_BUILTIN_PCMPISTRI128,
27756 IX86_BUILTIN_PCMPISTRM128,
27757 IX86_BUILTIN_PCMPISTRA128,
27758 IX86_BUILTIN_PCMPISTRC128,
27759 IX86_BUILTIN_PCMPISTRO128,
27760 IX86_BUILTIN_PCMPISTRS128,
27761 IX86_BUILTIN_PCMPISTRZ128,
27762
27763 IX86_BUILTIN_PCMPGTQ,
27764
27765 /* AES instructions */
27766 IX86_BUILTIN_AESENC128,
27767 IX86_BUILTIN_AESENCLAST128,
27768 IX86_BUILTIN_AESDEC128,
27769 IX86_BUILTIN_AESDECLAST128,
27770 IX86_BUILTIN_AESIMC128,
27771 IX86_BUILTIN_AESKEYGENASSIST128,
27772
27773 /* PCLMUL instruction */
27774 IX86_BUILTIN_PCLMULQDQ128,
27775
27776 /* AVX */
27777 IX86_BUILTIN_ADDPD256,
27778 IX86_BUILTIN_ADDPS256,
27779 IX86_BUILTIN_ADDSUBPD256,
27780 IX86_BUILTIN_ADDSUBPS256,
27781 IX86_BUILTIN_ANDPD256,
27782 IX86_BUILTIN_ANDPS256,
27783 IX86_BUILTIN_ANDNPD256,
27784 IX86_BUILTIN_ANDNPS256,
27785 IX86_BUILTIN_BLENDPD256,
27786 IX86_BUILTIN_BLENDPS256,
27787 IX86_BUILTIN_BLENDVPD256,
27788 IX86_BUILTIN_BLENDVPS256,
27789 IX86_BUILTIN_DIVPD256,
27790 IX86_BUILTIN_DIVPS256,
27791 IX86_BUILTIN_DPPS256,
27792 IX86_BUILTIN_HADDPD256,
27793 IX86_BUILTIN_HADDPS256,
27794 IX86_BUILTIN_HSUBPD256,
27795 IX86_BUILTIN_HSUBPS256,
27796 IX86_BUILTIN_MAXPD256,
27797 IX86_BUILTIN_MAXPS256,
27798 IX86_BUILTIN_MINPD256,
27799 IX86_BUILTIN_MINPS256,
27800 IX86_BUILTIN_MULPD256,
27801 IX86_BUILTIN_MULPS256,
27802 IX86_BUILTIN_ORPD256,
27803 IX86_BUILTIN_ORPS256,
27804 IX86_BUILTIN_SHUFPD256,
27805 IX86_BUILTIN_SHUFPS256,
27806 IX86_BUILTIN_SUBPD256,
27807 IX86_BUILTIN_SUBPS256,
27808 IX86_BUILTIN_XORPD256,
27809 IX86_BUILTIN_XORPS256,
27810 IX86_BUILTIN_CMPSD,
27811 IX86_BUILTIN_CMPSS,
27812 IX86_BUILTIN_CMPPD,
27813 IX86_BUILTIN_CMPPS,
27814 IX86_BUILTIN_CMPPD256,
27815 IX86_BUILTIN_CMPPS256,
27816 IX86_BUILTIN_CVTDQ2PD256,
27817 IX86_BUILTIN_CVTDQ2PS256,
27818 IX86_BUILTIN_CVTPD2PS256,
27819 IX86_BUILTIN_CVTPS2DQ256,
27820 IX86_BUILTIN_CVTPS2PD256,
27821 IX86_BUILTIN_CVTTPD2DQ256,
27822 IX86_BUILTIN_CVTPD2DQ256,
27823 IX86_BUILTIN_CVTTPS2DQ256,
27824 IX86_BUILTIN_EXTRACTF128PD256,
27825 IX86_BUILTIN_EXTRACTF128PS256,
27826 IX86_BUILTIN_EXTRACTF128SI256,
27827 IX86_BUILTIN_VZEROALL,
27828 IX86_BUILTIN_VZEROUPPER,
27829 IX86_BUILTIN_VPERMILVARPD,
27830 IX86_BUILTIN_VPERMILVARPS,
27831 IX86_BUILTIN_VPERMILVARPD256,
27832 IX86_BUILTIN_VPERMILVARPS256,
27833 IX86_BUILTIN_VPERMILPD,
27834 IX86_BUILTIN_VPERMILPS,
27835 IX86_BUILTIN_VPERMILPD256,
27836 IX86_BUILTIN_VPERMILPS256,
27837 IX86_BUILTIN_VPERMIL2PD,
27838 IX86_BUILTIN_VPERMIL2PS,
27839 IX86_BUILTIN_VPERMIL2PD256,
27840 IX86_BUILTIN_VPERMIL2PS256,
27841 IX86_BUILTIN_VPERM2F128PD256,
27842 IX86_BUILTIN_VPERM2F128PS256,
27843 IX86_BUILTIN_VPERM2F128SI256,
27844 IX86_BUILTIN_VBROADCASTSS,
27845 IX86_BUILTIN_VBROADCASTSD256,
27846 IX86_BUILTIN_VBROADCASTSS256,
27847 IX86_BUILTIN_VBROADCASTPD256,
27848 IX86_BUILTIN_VBROADCASTPS256,
27849 IX86_BUILTIN_VINSERTF128PD256,
27850 IX86_BUILTIN_VINSERTF128PS256,
27851 IX86_BUILTIN_VINSERTF128SI256,
27852 IX86_BUILTIN_LOADUPD256,
27853 IX86_BUILTIN_LOADUPS256,
27854 IX86_BUILTIN_STOREUPD256,
27855 IX86_BUILTIN_STOREUPS256,
27856 IX86_BUILTIN_LDDQU256,
27857 IX86_BUILTIN_MOVNTDQ256,
27858 IX86_BUILTIN_MOVNTPD256,
27859 IX86_BUILTIN_MOVNTPS256,
27860 IX86_BUILTIN_LOADDQU256,
27861 IX86_BUILTIN_STOREDQU256,
27862 IX86_BUILTIN_MASKLOADPD,
27863 IX86_BUILTIN_MASKLOADPS,
27864 IX86_BUILTIN_MASKSTOREPD,
27865 IX86_BUILTIN_MASKSTOREPS,
27866 IX86_BUILTIN_MASKLOADPD256,
27867 IX86_BUILTIN_MASKLOADPS256,
27868 IX86_BUILTIN_MASKSTOREPD256,
27869 IX86_BUILTIN_MASKSTOREPS256,
27870 IX86_BUILTIN_MOVSHDUP256,
27871 IX86_BUILTIN_MOVSLDUP256,
27872 IX86_BUILTIN_MOVDDUP256,
27873
27874 IX86_BUILTIN_SQRTPD256,
27875 IX86_BUILTIN_SQRTPS256,
27876 IX86_BUILTIN_SQRTPS_NR256,
27877 IX86_BUILTIN_RSQRTPS256,
27878 IX86_BUILTIN_RSQRTPS_NR256,
27879
27880 IX86_BUILTIN_RCPPS256,
27881
27882 IX86_BUILTIN_ROUNDPD256,
27883 IX86_BUILTIN_ROUNDPS256,
27884
27885 IX86_BUILTIN_FLOORPD256,
27886 IX86_BUILTIN_CEILPD256,
27887 IX86_BUILTIN_TRUNCPD256,
27888 IX86_BUILTIN_RINTPD256,
27889 IX86_BUILTIN_ROUNDPD_AZ256,
27890
27891 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27892 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27893 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27894
27895 IX86_BUILTIN_FLOORPS256,
27896 IX86_BUILTIN_CEILPS256,
27897 IX86_BUILTIN_TRUNCPS256,
27898 IX86_BUILTIN_RINTPS256,
27899 IX86_BUILTIN_ROUNDPS_AZ256,
27900
27901 IX86_BUILTIN_FLOORPS_SFIX256,
27902 IX86_BUILTIN_CEILPS_SFIX256,
27903 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27904
27905 IX86_BUILTIN_UNPCKHPD256,
27906 IX86_BUILTIN_UNPCKLPD256,
27907 IX86_BUILTIN_UNPCKHPS256,
27908 IX86_BUILTIN_UNPCKLPS256,
27909
27910 IX86_BUILTIN_SI256_SI,
27911 IX86_BUILTIN_PS256_PS,
27912 IX86_BUILTIN_PD256_PD,
27913 IX86_BUILTIN_SI_SI256,
27914 IX86_BUILTIN_PS_PS256,
27915 IX86_BUILTIN_PD_PD256,
27916
27917 IX86_BUILTIN_VTESTZPD,
27918 IX86_BUILTIN_VTESTCPD,
27919 IX86_BUILTIN_VTESTNZCPD,
27920 IX86_BUILTIN_VTESTZPS,
27921 IX86_BUILTIN_VTESTCPS,
27922 IX86_BUILTIN_VTESTNZCPS,
27923 IX86_BUILTIN_VTESTZPD256,
27924 IX86_BUILTIN_VTESTCPD256,
27925 IX86_BUILTIN_VTESTNZCPD256,
27926 IX86_BUILTIN_VTESTZPS256,
27927 IX86_BUILTIN_VTESTCPS256,
27928 IX86_BUILTIN_VTESTNZCPS256,
27929 IX86_BUILTIN_PTESTZ256,
27930 IX86_BUILTIN_PTESTC256,
27931 IX86_BUILTIN_PTESTNZC256,
27932
27933 IX86_BUILTIN_MOVMSKPD256,
27934 IX86_BUILTIN_MOVMSKPS256,
27935
27936 /* AVX2 */
27937 IX86_BUILTIN_MPSADBW256,
27938 IX86_BUILTIN_PABSB256,
27939 IX86_BUILTIN_PABSW256,
27940 IX86_BUILTIN_PABSD256,
27941 IX86_BUILTIN_PACKSSDW256,
27942 IX86_BUILTIN_PACKSSWB256,
27943 IX86_BUILTIN_PACKUSDW256,
27944 IX86_BUILTIN_PACKUSWB256,
27945 IX86_BUILTIN_PADDB256,
27946 IX86_BUILTIN_PADDW256,
27947 IX86_BUILTIN_PADDD256,
27948 IX86_BUILTIN_PADDQ256,
27949 IX86_BUILTIN_PADDSB256,
27950 IX86_BUILTIN_PADDSW256,
27951 IX86_BUILTIN_PADDUSB256,
27952 IX86_BUILTIN_PADDUSW256,
27953 IX86_BUILTIN_PALIGNR256,
27954 IX86_BUILTIN_AND256I,
27955 IX86_BUILTIN_ANDNOT256I,
27956 IX86_BUILTIN_PAVGB256,
27957 IX86_BUILTIN_PAVGW256,
27958 IX86_BUILTIN_PBLENDVB256,
27959 IX86_BUILTIN_PBLENDVW256,
27960 IX86_BUILTIN_PCMPEQB256,
27961 IX86_BUILTIN_PCMPEQW256,
27962 IX86_BUILTIN_PCMPEQD256,
27963 IX86_BUILTIN_PCMPEQQ256,
27964 IX86_BUILTIN_PCMPGTB256,
27965 IX86_BUILTIN_PCMPGTW256,
27966 IX86_BUILTIN_PCMPGTD256,
27967 IX86_BUILTIN_PCMPGTQ256,
27968 IX86_BUILTIN_PHADDW256,
27969 IX86_BUILTIN_PHADDD256,
27970 IX86_BUILTIN_PHADDSW256,
27971 IX86_BUILTIN_PHSUBW256,
27972 IX86_BUILTIN_PHSUBD256,
27973 IX86_BUILTIN_PHSUBSW256,
27974 IX86_BUILTIN_PMADDUBSW256,
27975 IX86_BUILTIN_PMADDWD256,
27976 IX86_BUILTIN_PMAXSB256,
27977 IX86_BUILTIN_PMAXSW256,
27978 IX86_BUILTIN_PMAXSD256,
27979 IX86_BUILTIN_PMAXUB256,
27980 IX86_BUILTIN_PMAXUW256,
27981 IX86_BUILTIN_PMAXUD256,
27982 IX86_BUILTIN_PMINSB256,
27983 IX86_BUILTIN_PMINSW256,
27984 IX86_BUILTIN_PMINSD256,
27985 IX86_BUILTIN_PMINUB256,
27986 IX86_BUILTIN_PMINUW256,
27987 IX86_BUILTIN_PMINUD256,
27988 IX86_BUILTIN_PMOVMSKB256,
27989 IX86_BUILTIN_PMOVSXBW256,
27990 IX86_BUILTIN_PMOVSXBD256,
27991 IX86_BUILTIN_PMOVSXBQ256,
27992 IX86_BUILTIN_PMOVSXWD256,
27993 IX86_BUILTIN_PMOVSXWQ256,
27994 IX86_BUILTIN_PMOVSXDQ256,
27995 IX86_BUILTIN_PMOVZXBW256,
27996 IX86_BUILTIN_PMOVZXBD256,
27997 IX86_BUILTIN_PMOVZXBQ256,
27998 IX86_BUILTIN_PMOVZXWD256,
27999 IX86_BUILTIN_PMOVZXWQ256,
28000 IX86_BUILTIN_PMOVZXDQ256,
28001 IX86_BUILTIN_PMULDQ256,
28002 IX86_BUILTIN_PMULHRSW256,
28003 IX86_BUILTIN_PMULHUW256,
28004 IX86_BUILTIN_PMULHW256,
28005 IX86_BUILTIN_PMULLW256,
28006 IX86_BUILTIN_PMULLD256,
28007 IX86_BUILTIN_PMULUDQ256,
28008 IX86_BUILTIN_POR256,
28009 IX86_BUILTIN_PSADBW256,
28010 IX86_BUILTIN_PSHUFB256,
28011 IX86_BUILTIN_PSHUFD256,
28012 IX86_BUILTIN_PSHUFHW256,
28013 IX86_BUILTIN_PSHUFLW256,
28014 IX86_BUILTIN_PSIGNB256,
28015 IX86_BUILTIN_PSIGNW256,
28016 IX86_BUILTIN_PSIGND256,
28017 IX86_BUILTIN_PSLLDQI256,
28018 IX86_BUILTIN_PSLLWI256,
28019 IX86_BUILTIN_PSLLW256,
28020 IX86_BUILTIN_PSLLDI256,
28021 IX86_BUILTIN_PSLLD256,
28022 IX86_BUILTIN_PSLLQI256,
28023 IX86_BUILTIN_PSLLQ256,
28024 IX86_BUILTIN_PSRAWI256,
28025 IX86_BUILTIN_PSRAW256,
28026 IX86_BUILTIN_PSRADI256,
28027 IX86_BUILTIN_PSRAD256,
28028 IX86_BUILTIN_PSRLDQI256,
28029 IX86_BUILTIN_PSRLWI256,
28030 IX86_BUILTIN_PSRLW256,
28031 IX86_BUILTIN_PSRLDI256,
28032 IX86_BUILTIN_PSRLD256,
28033 IX86_BUILTIN_PSRLQI256,
28034 IX86_BUILTIN_PSRLQ256,
28035 IX86_BUILTIN_PSUBB256,
28036 IX86_BUILTIN_PSUBW256,
28037 IX86_BUILTIN_PSUBD256,
28038 IX86_BUILTIN_PSUBQ256,
28039 IX86_BUILTIN_PSUBSB256,
28040 IX86_BUILTIN_PSUBSW256,
28041 IX86_BUILTIN_PSUBUSB256,
28042 IX86_BUILTIN_PSUBUSW256,
28043 IX86_BUILTIN_PUNPCKHBW256,
28044 IX86_BUILTIN_PUNPCKHWD256,
28045 IX86_BUILTIN_PUNPCKHDQ256,
28046 IX86_BUILTIN_PUNPCKHQDQ256,
28047 IX86_BUILTIN_PUNPCKLBW256,
28048 IX86_BUILTIN_PUNPCKLWD256,
28049 IX86_BUILTIN_PUNPCKLDQ256,
28050 IX86_BUILTIN_PUNPCKLQDQ256,
28051 IX86_BUILTIN_PXOR256,
28052 IX86_BUILTIN_MOVNTDQA256,
28053 IX86_BUILTIN_VBROADCASTSS_PS,
28054 IX86_BUILTIN_VBROADCASTSS_PS256,
28055 IX86_BUILTIN_VBROADCASTSD_PD256,
28056 IX86_BUILTIN_VBROADCASTSI256,
28057 IX86_BUILTIN_PBLENDD256,
28058 IX86_BUILTIN_PBLENDD128,
28059 IX86_BUILTIN_PBROADCASTB256,
28060 IX86_BUILTIN_PBROADCASTW256,
28061 IX86_BUILTIN_PBROADCASTD256,
28062 IX86_BUILTIN_PBROADCASTQ256,
28063 IX86_BUILTIN_PBROADCASTB128,
28064 IX86_BUILTIN_PBROADCASTW128,
28065 IX86_BUILTIN_PBROADCASTD128,
28066 IX86_BUILTIN_PBROADCASTQ128,
28067 IX86_BUILTIN_VPERMVARSI256,
28068 IX86_BUILTIN_VPERMDF256,
28069 IX86_BUILTIN_VPERMVARSF256,
28070 IX86_BUILTIN_VPERMDI256,
28071 IX86_BUILTIN_VPERMTI256,
28072 IX86_BUILTIN_VEXTRACT128I256,
28073 IX86_BUILTIN_VINSERT128I256,
28074 IX86_BUILTIN_MASKLOADD,
28075 IX86_BUILTIN_MASKLOADQ,
28076 IX86_BUILTIN_MASKLOADD256,
28077 IX86_BUILTIN_MASKLOADQ256,
28078 IX86_BUILTIN_MASKSTORED,
28079 IX86_BUILTIN_MASKSTOREQ,
28080 IX86_BUILTIN_MASKSTORED256,
28081 IX86_BUILTIN_MASKSTOREQ256,
28082 IX86_BUILTIN_PSLLVV4DI,
28083 IX86_BUILTIN_PSLLVV2DI,
28084 IX86_BUILTIN_PSLLVV8SI,
28085 IX86_BUILTIN_PSLLVV4SI,
28086 IX86_BUILTIN_PSRAVV8SI,
28087 IX86_BUILTIN_PSRAVV4SI,
28088 IX86_BUILTIN_PSRLVV4DI,
28089 IX86_BUILTIN_PSRLVV2DI,
28090 IX86_BUILTIN_PSRLVV8SI,
28091 IX86_BUILTIN_PSRLVV4SI,
28092
28093 IX86_BUILTIN_GATHERSIV2DF,
28094 IX86_BUILTIN_GATHERSIV4DF,
28095 IX86_BUILTIN_GATHERDIV2DF,
28096 IX86_BUILTIN_GATHERDIV4DF,
28097 IX86_BUILTIN_GATHERSIV4SF,
28098 IX86_BUILTIN_GATHERSIV8SF,
28099 IX86_BUILTIN_GATHERDIV4SF,
28100 IX86_BUILTIN_GATHERDIV8SF,
28101 IX86_BUILTIN_GATHERSIV2DI,
28102 IX86_BUILTIN_GATHERSIV4DI,
28103 IX86_BUILTIN_GATHERDIV2DI,
28104 IX86_BUILTIN_GATHERDIV4DI,
28105 IX86_BUILTIN_GATHERSIV4SI,
28106 IX86_BUILTIN_GATHERSIV8SI,
28107 IX86_BUILTIN_GATHERDIV4SI,
28108 IX86_BUILTIN_GATHERDIV8SI,
28109
28110 /* AVX512F */
28111 IX86_BUILTIN_ADDPD512,
28112 IX86_BUILTIN_ADDPS512,
28113 IX86_BUILTIN_ADDSD_ROUND,
28114 IX86_BUILTIN_ADDSS_ROUND,
28115 IX86_BUILTIN_ALIGND512,
28116 IX86_BUILTIN_ALIGNQ512,
28117 IX86_BUILTIN_BLENDMD512,
28118 IX86_BUILTIN_BLENDMPD512,
28119 IX86_BUILTIN_BLENDMPS512,
28120 IX86_BUILTIN_BLENDMQ512,
28121 IX86_BUILTIN_BROADCASTF32X4_512,
28122 IX86_BUILTIN_BROADCASTF64X4_512,
28123 IX86_BUILTIN_BROADCASTI32X4_512,
28124 IX86_BUILTIN_BROADCASTI64X4_512,
28125 IX86_BUILTIN_BROADCASTSD512,
28126 IX86_BUILTIN_BROADCASTSS512,
28127 IX86_BUILTIN_CMPD512,
28128 IX86_BUILTIN_CMPPD512,
28129 IX86_BUILTIN_CMPPS512,
28130 IX86_BUILTIN_CMPQ512,
28131 IX86_BUILTIN_CMPSD_MASK,
28132 IX86_BUILTIN_CMPSS_MASK,
28133 IX86_BUILTIN_COMIDF,
28134 IX86_BUILTIN_COMISF,
28135 IX86_BUILTIN_COMPRESSPD512,
28136 IX86_BUILTIN_COMPRESSPDSTORE512,
28137 IX86_BUILTIN_COMPRESSPS512,
28138 IX86_BUILTIN_COMPRESSPSSTORE512,
28139 IX86_BUILTIN_CVTDQ2PD512,
28140 IX86_BUILTIN_CVTDQ2PS512,
28141 IX86_BUILTIN_CVTPD2DQ512,
28142 IX86_BUILTIN_CVTPD2PS512,
28143 IX86_BUILTIN_CVTPD2UDQ512,
28144 IX86_BUILTIN_CVTPH2PS512,
28145 IX86_BUILTIN_CVTPS2DQ512,
28146 IX86_BUILTIN_CVTPS2PD512,
28147 IX86_BUILTIN_CVTPS2PH512,
28148 IX86_BUILTIN_CVTPS2UDQ512,
28149 IX86_BUILTIN_CVTSD2SS_ROUND,
28150 IX86_BUILTIN_CVTSI2SD64,
28151 IX86_BUILTIN_CVTSI2SS32,
28152 IX86_BUILTIN_CVTSI2SS64,
28153 IX86_BUILTIN_CVTSS2SD_ROUND,
28154 IX86_BUILTIN_CVTTPD2DQ512,
28155 IX86_BUILTIN_CVTTPD2UDQ512,
28156 IX86_BUILTIN_CVTTPS2DQ512,
28157 IX86_BUILTIN_CVTTPS2UDQ512,
28158 IX86_BUILTIN_CVTUDQ2PD512,
28159 IX86_BUILTIN_CVTUDQ2PS512,
28160 IX86_BUILTIN_CVTUSI2SD32,
28161 IX86_BUILTIN_CVTUSI2SD64,
28162 IX86_BUILTIN_CVTUSI2SS32,
28163 IX86_BUILTIN_CVTUSI2SS64,
28164 IX86_BUILTIN_DIVPD512,
28165 IX86_BUILTIN_DIVPS512,
28166 IX86_BUILTIN_DIVSD_ROUND,
28167 IX86_BUILTIN_DIVSS_ROUND,
28168 IX86_BUILTIN_EXPANDPD512,
28169 IX86_BUILTIN_EXPANDPD512Z,
28170 IX86_BUILTIN_EXPANDPDLOAD512,
28171 IX86_BUILTIN_EXPANDPDLOAD512Z,
28172 IX86_BUILTIN_EXPANDPS512,
28173 IX86_BUILTIN_EXPANDPS512Z,
28174 IX86_BUILTIN_EXPANDPSLOAD512,
28175 IX86_BUILTIN_EXPANDPSLOAD512Z,
28176 IX86_BUILTIN_EXTRACTF32X4,
28177 IX86_BUILTIN_EXTRACTF64X4,
28178 IX86_BUILTIN_EXTRACTI32X4,
28179 IX86_BUILTIN_EXTRACTI64X4,
28180 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28181 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28182 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28183 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28184 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28185 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28186 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28187 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28188 IX86_BUILTIN_GETEXPPD512,
28189 IX86_BUILTIN_GETEXPPS512,
28190 IX86_BUILTIN_GETEXPSD128,
28191 IX86_BUILTIN_GETEXPSS128,
28192 IX86_BUILTIN_GETMANTPD512,
28193 IX86_BUILTIN_GETMANTPS512,
28194 IX86_BUILTIN_GETMANTSD128,
28195 IX86_BUILTIN_GETMANTSS128,
28196 IX86_BUILTIN_INSERTF32X4,
28197 IX86_BUILTIN_INSERTF64X4,
28198 IX86_BUILTIN_INSERTI32X4,
28199 IX86_BUILTIN_INSERTI64X4,
28200 IX86_BUILTIN_LOADAPD512,
28201 IX86_BUILTIN_LOADAPS512,
28202 IX86_BUILTIN_LOADDQUDI512,
28203 IX86_BUILTIN_LOADDQUSI512,
28204 IX86_BUILTIN_LOADUPD512,
28205 IX86_BUILTIN_LOADUPS512,
28206 IX86_BUILTIN_MAXPD512,
28207 IX86_BUILTIN_MAXPS512,
28208 IX86_BUILTIN_MAXSD_ROUND,
28209 IX86_BUILTIN_MAXSS_ROUND,
28210 IX86_BUILTIN_MINPD512,
28211 IX86_BUILTIN_MINPS512,
28212 IX86_BUILTIN_MINSD_ROUND,
28213 IX86_BUILTIN_MINSS_ROUND,
28214 IX86_BUILTIN_MOVAPD512,
28215 IX86_BUILTIN_MOVAPS512,
28216 IX86_BUILTIN_MOVDDUP512,
28217 IX86_BUILTIN_MOVDQA32LOAD512,
28218 IX86_BUILTIN_MOVDQA32STORE512,
28219 IX86_BUILTIN_MOVDQA32_512,
28220 IX86_BUILTIN_MOVDQA64LOAD512,
28221 IX86_BUILTIN_MOVDQA64STORE512,
28222 IX86_BUILTIN_MOVDQA64_512,
28223 IX86_BUILTIN_MOVNTDQ512,
28224 IX86_BUILTIN_MOVNTDQA512,
28225 IX86_BUILTIN_MOVNTPD512,
28226 IX86_BUILTIN_MOVNTPS512,
28227 IX86_BUILTIN_MOVSHDUP512,
28228 IX86_BUILTIN_MOVSLDUP512,
28229 IX86_BUILTIN_MULPD512,
28230 IX86_BUILTIN_MULPS512,
28231 IX86_BUILTIN_MULSD_ROUND,
28232 IX86_BUILTIN_MULSS_ROUND,
28233 IX86_BUILTIN_PABSD512,
28234 IX86_BUILTIN_PABSQ512,
28235 IX86_BUILTIN_PADDD512,
28236 IX86_BUILTIN_PADDQ512,
28237 IX86_BUILTIN_PANDD512,
28238 IX86_BUILTIN_PANDND512,
28239 IX86_BUILTIN_PANDNQ512,
28240 IX86_BUILTIN_PANDQ512,
28241 IX86_BUILTIN_PBROADCASTD512,
28242 IX86_BUILTIN_PBROADCASTD512_GPR,
28243 IX86_BUILTIN_PBROADCASTMB512,
28244 IX86_BUILTIN_PBROADCASTMW512,
28245 IX86_BUILTIN_PBROADCASTQ512,
28246 IX86_BUILTIN_PBROADCASTQ512_GPR,
28247 IX86_BUILTIN_PBROADCASTQ512_MEM,
28248 IX86_BUILTIN_PCMPEQD512_MASK,
28249 IX86_BUILTIN_PCMPEQQ512_MASK,
28250 IX86_BUILTIN_PCMPGTD512_MASK,
28251 IX86_BUILTIN_PCMPGTQ512_MASK,
28252 IX86_BUILTIN_PCOMPRESSD512,
28253 IX86_BUILTIN_PCOMPRESSDSTORE512,
28254 IX86_BUILTIN_PCOMPRESSQ512,
28255 IX86_BUILTIN_PCOMPRESSQSTORE512,
28256 IX86_BUILTIN_PEXPANDD512,
28257 IX86_BUILTIN_PEXPANDD512Z,
28258 IX86_BUILTIN_PEXPANDDLOAD512,
28259 IX86_BUILTIN_PEXPANDDLOAD512Z,
28260 IX86_BUILTIN_PEXPANDQ512,
28261 IX86_BUILTIN_PEXPANDQ512Z,
28262 IX86_BUILTIN_PEXPANDQLOAD512,
28263 IX86_BUILTIN_PEXPANDQLOAD512Z,
28264 IX86_BUILTIN_PMAXSD512,
28265 IX86_BUILTIN_PMAXSQ512,
28266 IX86_BUILTIN_PMAXUD512,
28267 IX86_BUILTIN_PMAXUQ512,
28268 IX86_BUILTIN_PMINSD512,
28269 IX86_BUILTIN_PMINSQ512,
28270 IX86_BUILTIN_PMINUD512,
28271 IX86_BUILTIN_PMINUQ512,
28272 IX86_BUILTIN_PMOVDB512,
28273 IX86_BUILTIN_PMOVDB512_MEM,
28274 IX86_BUILTIN_PMOVDW512,
28275 IX86_BUILTIN_PMOVDW512_MEM,
28276 IX86_BUILTIN_PMOVQB512,
28277 IX86_BUILTIN_PMOVQB512_MEM,
28278 IX86_BUILTIN_PMOVQD512,
28279 IX86_BUILTIN_PMOVQD512_MEM,
28280 IX86_BUILTIN_PMOVQW512,
28281 IX86_BUILTIN_PMOVQW512_MEM,
28282 IX86_BUILTIN_PMOVSDB512,
28283 IX86_BUILTIN_PMOVSDB512_MEM,
28284 IX86_BUILTIN_PMOVSDW512,
28285 IX86_BUILTIN_PMOVSDW512_MEM,
28286 IX86_BUILTIN_PMOVSQB512,
28287 IX86_BUILTIN_PMOVSQB512_MEM,
28288 IX86_BUILTIN_PMOVSQD512,
28289 IX86_BUILTIN_PMOVSQD512_MEM,
28290 IX86_BUILTIN_PMOVSQW512,
28291 IX86_BUILTIN_PMOVSQW512_MEM,
28292 IX86_BUILTIN_PMOVSXBD512,
28293 IX86_BUILTIN_PMOVSXBQ512,
28294 IX86_BUILTIN_PMOVSXDQ512,
28295 IX86_BUILTIN_PMOVSXWD512,
28296 IX86_BUILTIN_PMOVSXWQ512,
28297 IX86_BUILTIN_PMOVUSDB512,
28298 IX86_BUILTIN_PMOVUSDB512_MEM,
28299 IX86_BUILTIN_PMOVUSDW512,
28300 IX86_BUILTIN_PMOVUSDW512_MEM,
28301 IX86_BUILTIN_PMOVUSQB512,
28302 IX86_BUILTIN_PMOVUSQB512_MEM,
28303 IX86_BUILTIN_PMOVUSQD512,
28304 IX86_BUILTIN_PMOVUSQD512_MEM,
28305 IX86_BUILTIN_PMOVUSQW512,
28306 IX86_BUILTIN_PMOVUSQW512_MEM,
28307 IX86_BUILTIN_PMOVZXBD512,
28308 IX86_BUILTIN_PMOVZXBQ512,
28309 IX86_BUILTIN_PMOVZXDQ512,
28310 IX86_BUILTIN_PMOVZXWD512,
28311 IX86_BUILTIN_PMOVZXWQ512,
28312 IX86_BUILTIN_PMULDQ512,
28313 IX86_BUILTIN_PMULLD512,
28314 IX86_BUILTIN_PMULUDQ512,
28315 IX86_BUILTIN_PORD512,
28316 IX86_BUILTIN_PORQ512,
28317 IX86_BUILTIN_PROLD512,
28318 IX86_BUILTIN_PROLQ512,
28319 IX86_BUILTIN_PROLVD512,
28320 IX86_BUILTIN_PROLVQ512,
28321 IX86_BUILTIN_PRORD512,
28322 IX86_BUILTIN_PRORQ512,
28323 IX86_BUILTIN_PRORVD512,
28324 IX86_BUILTIN_PRORVQ512,
28325 IX86_BUILTIN_PSHUFD512,
28326 IX86_BUILTIN_PSLLD512,
28327 IX86_BUILTIN_PSLLDI512,
28328 IX86_BUILTIN_PSLLQ512,
28329 IX86_BUILTIN_PSLLQI512,
28330 IX86_BUILTIN_PSLLVV16SI,
28331 IX86_BUILTIN_PSLLVV8DI,
28332 IX86_BUILTIN_PSRAD512,
28333 IX86_BUILTIN_PSRADI512,
28334 IX86_BUILTIN_PSRAQ512,
28335 IX86_BUILTIN_PSRAQI512,
28336 IX86_BUILTIN_PSRAVV16SI,
28337 IX86_BUILTIN_PSRAVV8DI,
28338 IX86_BUILTIN_PSRLD512,
28339 IX86_BUILTIN_PSRLDI512,
28340 IX86_BUILTIN_PSRLQ512,
28341 IX86_BUILTIN_PSRLQI512,
28342 IX86_BUILTIN_PSRLVV16SI,
28343 IX86_BUILTIN_PSRLVV8DI,
28344 IX86_BUILTIN_PSUBD512,
28345 IX86_BUILTIN_PSUBQ512,
28346 IX86_BUILTIN_PTESTMD512,
28347 IX86_BUILTIN_PTESTMQ512,
28348 IX86_BUILTIN_PTESTNMD512,
28349 IX86_BUILTIN_PTESTNMQ512,
28350 IX86_BUILTIN_PUNPCKHDQ512,
28351 IX86_BUILTIN_PUNPCKHQDQ512,
28352 IX86_BUILTIN_PUNPCKLDQ512,
28353 IX86_BUILTIN_PUNPCKLQDQ512,
28354 IX86_BUILTIN_PXORD512,
28355 IX86_BUILTIN_PXORQ512,
28356 IX86_BUILTIN_RCP14PD512,
28357 IX86_BUILTIN_RCP14PS512,
28358 IX86_BUILTIN_RCP14SD,
28359 IX86_BUILTIN_RCP14SS,
28360 IX86_BUILTIN_RNDSCALEPD,
28361 IX86_BUILTIN_RNDSCALEPS,
28362 IX86_BUILTIN_RNDSCALESD,
28363 IX86_BUILTIN_RNDSCALESS,
28364 IX86_BUILTIN_RSQRT14PD512,
28365 IX86_BUILTIN_RSQRT14PS512,
28366 IX86_BUILTIN_RSQRT14SD,
28367 IX86_BUILTIN_RSQRT14SS,
28368 IX86_BUILTIN_SCALEFPD512,
28369 IX86_BUILTIN_SCALEFPS512,
28370 IX86_BUILTIN_SCALEFSD,
28371 IX86_BUILTIN_SCALEFSS,
28372 IX86_BUILTIN_SHUFPD512,
28373 IX86_BUILTIN_SHUFPS512,
28374 IX86_BUILTIN_SHUF_F32x4,
28375 IX86_BUILTIN_SHUF_F64x2,
28376 IX86_BUILTIN_SHUF_I32x4,
28377 IX86_BUILTIN_SHUF_I64x2,
28378 IX86_BUILTIN_SQRTPD512,
28379 IX86_BUILTIN_SQRTPD512_MASK,
28380 IX86_BUILTIN_SQRTPS512_MASK,
28381 IX86_BUILTIN_SQRTPS_NR512,
28382 IX86_BUILTIN_SQRTSD_ROUND,
28383 IX86_BUILTIN_SQRTSS_ROUND,
28384 IX86_BUILTIN_STOREAPD512,
28385 IX86_BUILTIN_STOREAPS512,
28386 IX86_BUILTIN_STOREDQUDI512,
28387 IX86_BUILTIN_STOREDQUSI512,
28388 IX86_BUILTIN_STOREUPD512,
28389 IX86_BUILTIN_STOREUPS512,
28390 IX86_BUILTIN_SUBPD512,
28391 IX86_BUILTIN_SUBPS512,
28392 IX86_BUILTIN_SUBSD_ROUND,
28393 IX86_BUILTIN_SUBSS_ROUND,
28394 IX86_BUILTIN_UCMPD512,
28395 IX86_BUILTIN_UCMPQ512,
28396 IX86_BUILTIN_UNPCKHPD512,
28397 IX86_BUILTIN_UNPCKHPS512,
28398 IX86_BUILTIN_UNPCKLPD512,
28399 IX86_BUILTIN_UNPCKLPS512,
28400 IX86_BUILTIN_VCVTSD2SI32,
28401 IX86_BUILTIN_VCVTSD2SI64,
28402 IX86_BUILTIN_VCVTSD2USI32,
28403 IX86_BUILTIN_VCVTSD2USI64,
28404 IX86_BUILTIN_VCVTSS2SI32,
28405 IX86_BUILTIN_VCVTSS2SI64,
28406 IX86_BUILTIN_VCVTSS2USI32,
28407 IX86_BUILTIN_VCVTSS2USI64,
28408 IX86_BUILTIN_VCVTTSD2SI32,
28409 IX86_BUILTIN_VCVTTSD2SI64,
28410 IX86_BUILTIN_VCVTTSD2USI32,
28411 IX86_BUILTIN_VCVTTSD2USI64,
28412 IX86_BUILTIN_VCVTTSS2SI32,
28413 IX86_BUILTIN_VCVTTSS2SI64,
28414 IX86_BUILTIN_VCVTTSS2USI32,
28415 IX86_BUILTIN_VCVTTSS2USI64,
28416 IX86_BUILTIN_VFMADDPD512_MASK,
28417 IX86_BUILTIN_VFMADDPD512_MASK3,
28418 IX86_BUILTIN_VFMADDPD512_MASKZ,
28419 IX86_BUILTIN_VFMADDPS512_MASK,
28420 IX86_BUILTIN_VFMADDPS512_MASK3,
28421 IX86_BUILTIN_VFMADDPS512_MASKZ,
28422 IX86_BUILTIN_VFMADDSD3_ROUND,
28423 IX86_BUILTIN_VFMADDSS3_ROUND,
28424 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28425 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28426 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28427 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28428 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28429 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28430 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28431 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28432 IX86_BUILTIN_VFMSUBPD512_MASK3,
28433 IX86_BUILTIN_VFMSUBPS512_MASK3,
28434 IX86_BUILTIN_VFMSUBSD3_MASK3,
28435 IX86_BUILTIN_VFMSUBSS3_MASK3,
28436 IX86_BUILTIN_VFNMADDPD512_MASK,
28437 IX86_BUILTIN_VFNMADDPS512_MASK,
28438 IX86_BUILTIN_VFNMSUBPD512_MASK,
28439 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28440 IX86_BUILTIN_VFNMSUBPS512_MASK,
28441 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28442 IX86_BUILTIN_VPCLZCNTD512,
28443 IX86_BUILTIN_VPCLZCNTQ512,
28444 IX86_BUILTIN_VPCONFLICTD512,
28445 IX86_BUILTIN_VPCONFLICTQ512,
28446 IX86_BUILTIN_VPERMDF512,
28447 IX86_BUILTIN_VPERMDI512,
28448 IX86_BUILTIN_VPERMI2VARD512,
28449 IX86_BUILTIN_VPERMI2VARPD512,
28450 IX86_BUILTIN_VPERMI2VARPS512,
28451 IX86_BUILTIN_VPERMI2VARQ512,
28452 IX86_BUILTIN_VPERMILPD512,
28453 IX86_BUILTIN_VPERMILPS512,
28454 IX86_BUILTIN_VPERMILVARPD512,
28455 IX86_BUILTIN_VPERMILVARPS512,
28456 IX86_BUILTIN_VPERMT2VARD512,
28457 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28458 IX86_BUILTIN_VPERMT2VARPD512,
28459 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28460 IX86_BUILTIN_VPERMT2VARPS512,
28461 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28462 IX86_BUILTIN_VPERMT2VARQ512,
28463 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28464 IX86_BUILTIN_VPERMVARDF512,
28465 IX86_BUILTIN_VPERMVARDI512,
28466 IX86_BUILTIN_VPERMVARSF512,
28467 IX86_BUILTIN_VPERMVARSI512,
28468 IX86_BUILTIN_VTERNLOGD512_MASK,
28469 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28470 IX86_BUILTIN_VTERNLOGQ512_MASK,
28471 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28472
28473 /* Mask arithmetic operations */
28474 IX86_BUILTIN_KAND16,
28475 IX86_BUILTIN_KANDN16,
28476 IX86_BUILTIN_KNOT16,
28477 IX86_BUILTIN_KOR16,
28478 IX86_BUILTIN_KORTESTC16,
28479 IX86_BUILTIN_KORTESTZ16,
28480 IX86_BUILTIN_KUNPCKBW,
28481 IX86_BUILTIN_KXNOR16,
28482 IX86_BUILTIN_KXOR16,
28483 IX86_BUILTIN_KMOV16,
28484
28485 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28486 where all operands are 32-byte or 64-byte wide respectively. */
28487 IX86_BUILTIN_GATHERALTSIV4DF,
28488 IX86_BUILTIN_GATHERALTDIV8SF,
28489 IX86_BUILTIN_GATHERALTSIV4DI,
28490 IX86_BUILTIN_GATHERALTDIV8SI,
28491 IX86_BUILTIN_GATHER3ALTDIV16SF,
28492 IX86_BUILTIN_GATHER3ALTDIV16SI,
28493 IX86_BUILTIN_GATHER3ALTSIV8DF,
28494 IX86_BUILTIN_GATHER3ALTSIV8DI,
28495 IX86_BUILTIN_GATHER3DIV16SF,
28496 IX86_BUILTIN_GATHER3DIV16SI,
28497 IX86_BUILTIN_GATHER3DIV8DF,
28498 IX86_BUILTIN_GATHER3DIV8DI,
28499 IX86_BUILTIN_GATHER3SIV16SF,
28500 IX86_BUILTIN_GATHER3SIV16SI,
28501 IX86_BUILTIN_GATHER3SIV8DF,
28502 IX86_BUILTIN_GATHER3SIV8DI,
28503 IX86_BUILTIN_SCATTERDIV16SF,
28504 IX86_BUILTIN_SCATTERDIV16SI,
28505 IX86_BUILTIN_SCATTERDIV8DF,
28506 IX86_BUILTIN_SCATTERDIV8DI,
28507 IX86_BUILTIN_SCATTERSIV16SF,
28508 IX86_BUILTIN_SCATTERSIV16SI,
28509 IX86_BUILTIN_SCATTERSIV8DF,
28510 IX86_BUILTIN_SCATTERSIV8DI,
28511
28512 /* AVX512PF */
28513 IX86_BUILTIN_GATHERPFQPD,
28514 IX86_BUILTIN_GATHERPFDPS,
28515 IX86_BUILTIN_GATHERPFDPD,
28516 IX86_BUILTIN_GATHERPFQPS,
28517 IX86_BUILTIN_SCATTERPFDPD,
28518 IX86_BUILTIN_SCATTERPFDPS,
28519 IX86_BUILTIN_SCATTERPFQPD,
28520 IX86_BUILTIN_SCATTERPFQPS,
28521
28522 /* AVX-512ER */
28523 IX86_BUILTIN_EXP2PD_MASK,
28524 IX86_BUILTIN_EXP2PS_MASK,
28525 IX86_BUILTIN_EXP2PS,
28526 IX86_BUILTIN_RCP28PD,
28527 IX86_BUILTIN_RCP28PS,
28528 IX86_BUILTIN_RCP28SD,
28529 IX86_BUILTIN_RCP28SS,
28530 IX86_BUILTIN_RSQRT28PD,
28531 IX86_BUILTIN_RSQRT28PS,
28532 IX86_BUILTIN_RSQRT28SD,
28533 IX86_BUILTIN_RSQRT28SS,
28534
28535 /* SHA builtins. */
28536 IX86_BUILTIN_SHA1MSG1,
28537 IX86_BUILTIN_SHA1MSG2,
28538 IX86_BUILTIN_SHA1NEXTE,
28539 IX86_BUILTIN_SHA1RNDS4,
28540 IX86_BUILTIN_SHA256MSG1,
28541 IX86_BUILTIN_SHA256MSG2,
28542 IX86_BUILTIN_SHA256RNDS2,
28543
28544 /* CLFLUSHOPT instructions. */
28545 IX86_BUILTIN_CLFLUSHOPT,
28546
28547 /* TFmode support builtins. */
28548 IX86_BUILTIN_INFQ,
28549 IX86_BUILTIN_HUGE_VALQ,
28550 IX86_BUILTIN_FABSQ,
28551 IX86_BUILTIN_COPYSIGNQ,
28552
28553 /* Vectorizer support builtins. */
28554 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28555 IX86_BUILTIN_CPYSGNPS,
28556 IX86_BUILTIN_CPYSGNPD,
28557 IX86_BUILTIN_CPYSGNPS256,
28558 IX86_BUILTIN_CPYSGNPS512,
28559 IX86_BUILTIN_CPYSGNPD256,
28560 IX86_BUILTIN_CPYSGNPD512,
28561 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28562 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28563
28564
28565 /* FMA4 instructions. */
28566 IX86_BUILTIN_VFMADDSS,
28567 IX86_BUILTIN_VFMADDSD,
28568 IX86_BUILTIN_VFMADDPS,
28569 IX86_BUILTIN_VFMADDPD,
28570 IX86_BUILTIN_VFMADDPS256,
28571 IX86_BUILTIN_VFMADDPD256,
28572 IX86_BUILTIN_VFMADDSUBPS,
28573 IX86_BUILTIN_VFMADDSUBPD,
28574 IX86_BUILTIN_VFMADDSUBPS256,
28575 IX86_BUILTIN_VFMADDSUBPD256,
28576
28577 /* FMA3 instructions. */
28578 IX86_BUILTIN_VFMADDSS3,
28579 IX86_BUILTIN_VFMADDSD3,
28580
28581 /* XOP instructions. */
28582 IX86_BUILTIN_VPCMOV,
28583 IX86_BUILTIN_VPCMOV_V2DI,
28584 IX86_BUILTIN_VPCMOV_V4SI,
28585 IX86_BUILTIN_VPCMOV_V8HI,
28586 IX86_BUILTIN_VPCMOV_V16QI,
28587 IX86_BUILTIN_VPCMOV_V4SF,
28588 IX86_BUILTIN_VPCMOV_V2DF,
28589 IX86_BUILTIN_VPCMOV256,
28590 IX86_BUILTIN_VPCMOV_V4DI256,
28591 IX86_BUILTIN_VPCMOV_V8SI256,
28592 IX86_BUILTIN_VPCMOV_V16HI256,
28593 IX86_BUILTIN_VPCMOV_V32QI256,
28594 IX86_BUILTIN_VPCMOV_V8SF256,
28595 IX86_BUILTIN_VPCMOV_V4DF256,
28596
28597 IX86_BUILTIN_VPPERM,
28598
28599 IX86_BUILTIN_VPMACSSWW,
28600 IX86_BUILTIN_VPMACSWW,
28601 IX86_BUILTIN_VPMACSSWD,
28602 IX86_BUILTIN_VPMACSWD,
28603 IX86_BUILTIN_VPMACSSDD,
28604 IX86_BUILTIN_VPMACSDD,
28605 IX86_BUILTIN_VPMACSSDQL,
28606 IX86_BUILTIN_VPMACSSDQH,
28607 IX86_BUILTIN_VPMACSDQL,
28608 IX86_BUILTIN_VPMACSDQH,
28609 IX86_BUILTIN_VPMADCSSWD,
28610 IX86_BUILTIN_VPMADCSWD,
28611
28612 IX86_BUILTIN_VPHADDBW,
28613 IX86_BUILTIN_VPHADDBD,
28614 IX86_BUILTIN_VPHADDBQ,
28615 IX86_BUILTIN_VPHADDWD,
28616 IX86_BUILTIN_VPHADDWQ,
28617 IX86_BUILTIN_VPHADDDQ,
28618 IX86_BUILTIN_VPHADDUBW,
28619 IX86_BUILTIN_VPHADDUBD,
28620 IX86_BUILTIN_VPHADDUBQ,
28621 IX86_BUILTIN_VPHADDUWD,
28622 IX86_BUILTIN_VPHADDUWQ,
28623 IX86_BUILTIN_VPHADDUDQ,
28624 IX86_BUILTIN_VPHSUBBW,
28625 IX86_BUILTIN_VPHSUBWD,
28626 IX86_BUILTIN_VPHSUBDQ,
28627
28628 IX86_BUILTIN_VPROTB,
28629 IX86_BUILTIN_VPROTW,
28630 IX86_BUILTIN_VPROTD,
28631 IX86_BUILTIN_VPROTQ,
28632 IX86_BUILTIN_VPROTB_IMM,
28633 IX86_BUILTIN_VPROTW_IMM,
28634 IX86_BUILTIN_VPROTD_IMM,
28635 IX86_BUILTIN_VPROTQ_IMM,
28636
28637 IX86_BUILTIN_VPSHLB,
28638 IX86_BUILTIN_VPSHLW,
28639 IX86_BUILTIN_VPSHLD,
28640 IX86_BUILTIN_VPSHLQ,
28641 IX86_BUILTIN_VPSHAB,
28642 IX86_BUILTIN_VPSHAW,
28643 IX86_BUILTIN_VPSHAD,
28644 IX86_BUILTIN_VPSHAQ,
28645
28646 IX86_BUILTIN_VFRCZSS,
28647 IX86_BUILTIN_VFRCZSD,
28648 IX86_BUILTIN_VFRCZPS,
28649 IX86_BUILTIN_VFRCZPD,
28650 IX86_BUILTIN_VFRCZPS256,
28651 IX86_BUILTIN_VFRCZPD256,
28652
28653 IX86_BUILTIN_VPCOMEQUB,
28654 IX86_BUILTIN_VPCOMNEUB,
28655 IX86_BUILTIN_VPCOMLTUB,
28656 IX86_BUILTIN_VPCOMLEUB,
28657 IX86_BUILTIN_VPCOMGTUB,
28658 IX86_BUILTIN_VPCOMGEUB,
28659 IX86_BUILTIN_VPCOMFALSEUB,
28660 IX86_BUILTIN_VPCOMTRUEUB,
28661
28662 IX86_BUILTIN_VPCOMEQUW,
28663 IX86_BUILTIN_VPCOMNEUW,
28664 IX86_BUILTIN_VPCOMLTUW,
28665 IX86_BUILTIN_VPCOMLEUW,
28666 IX86_BUILTIN_VPCOMGTUW,
28667 IX86_BUILTIN_VPCOMGEUW,
28668 IX86_BUILTIN_VPCOMFALSEUW,
28669 IX86_BUILTIN_VPCOMTRUEUW,
28670
28671 IX86_BUILTIN_VPCOMEQUD,
28672 IX86_BUILTIN_VPCOMNEUD,
28673 IX86_BUILTIN_VPCOMLTUD,
28674 IX86_BUILTIN_VPCOMLEUD,
28675 IX86_BUILTIN_VPCOMGTUD,
28676 IX86_BUILTIN_VPCOMGEUD,
28677 IX86_BUILTIN_VPCOMFALSEUD,
28678 IX86_BUILTIN_VPCOMTRUEUD,
28679
28680 IX86_BUILTIN_VPCOMEQUQ,
28681 IX86_BUILTIN_VPCOMNEUQ,
28682 IX86_BUILTIN_VPCOMLTUQ,
28683 IX86_BUILTIN_VPCOMLEUQ,
28684 IX86_BUILTIN_VPCOMGTUQ,
28685 IX86_BUILTIN_VPCOMGEUQ,
28686 IX86_BUILTIN_VPCOMFALSEUQ,
28687 IX86_BUILTIN_VPCOMTRUEUQ,
28688
28689 IX86_BUILTIN_VPCOMEQB,
28690 IX86_BUILTIN_VPCOMNEB,
28691 IX86_BUILTIN_VPCOMLTB,
28692 IX86_BUILTIN_VPCOMLEB,
28693 IX86_BUILTIN_VPCOMGTB,
28694 IX86_BUILTIN_VPCOMGEB,
28695 IX86_BUILTIN_VPCOMFALSEB,
28696 IX86_BUILTIN_VPCOMTRUEB,
28697
28698 IX86_BUILTIN_VPCOMEQW,
28699 IX86_BUILTIN_VPCOMNEW,
28700 IX86_BUILTIN_VPCOMLTW,
28701 IX86_BUILTIN_VPCOMLEW,
28702 IX86_BUILTIN_VPCOMGTW,
28703 IX86_BUILTIN_VPCOMGEW,
28704 IX86_BUILTIN_VPCOMFALSEW,
28705 IX86_BUILTIN_VPCOMTRUEW,
28706
28707 IX86_BUILTIN_VPCOMEQD,
28708 IX86_BUILTIN_VPCOMNED,
28709 IX86_BUILTIN_VPCOMLTD,
28710 IX86_BUILTIN_VPCOMLED,
28711 IX86_BUILTIN_VPCOMGTD,
28712 IX86_BUILTIN_VPCOMGED,
28713 IX86_BUILTIN_VPCOMFALSED,
28714 IX86_BUILTIN_VPCOMTRUED,
28715
28716 IX86_BUILTIN_VPCOMEQQ,
28717 IX86_BUILTIN_VPCOMNEQ,
28718 IX86_BUILTIN_VPCOMLTQ,
28719 IX86_BUILTIN_VPCOMLEQ,
28720 IX86_BUILTIN_VPCOMGTQ,
28721 IX86_BUILTIN_VPCOMGEQ,
28722 IX86_BUILTIN_VPCOMFALSEQ,
28723 IX86_BUILTIN_VPCOMTRUEQ,
28724
28725 /* LWP instructions. */
28726 IX86_BUILTIN_LLWPCB,
28727 IX86_BUILTIN_SLWPCB,
28728 IX86_BUILTIN_LWPVAL32,
28729 IX86_BUILTIN_LWPVAL64,
28730 IX86_BUILTIN_LWPINS32,
28731 IX86_BUILTIN_LWPINS64,
28732
28733 IX86_BUILTIN_CLZS,
28734
28735 /* RTM */
28736 IX86_BUILTIN_XBEGIN,
28737 IX86_BUILTIN_XEND,
28738 IX86_BUILTIN_XABORT,
28739 IX86_BUILTIN_XTEST,
28740
28741 /* BMI instructions. */
28742 IX86_BUILTIN_BEXTR32,
28743 IX86_BUILTIN_BEXTR64,
28744 IX86_BUILTIN_CTZS,
28745
28746 /* TBM instructions. */
28747 IX86_BUILTIN_BEXTRI32,
28748 IX86_BUILTIN_BEXTRI64,
28749
28750 /* BMI2 instructions. */
28751 IX86_BUILTIN_BZHI32,
28752 IX86_BUILTIN_BZHI64,
28753 IX86_BUILTIN_PDEP32,
28754 IX86_BUILTIN_PDEP64,
28755 IX86_BUILTIN_PEXT32,
28756 IX86_BUILTIN_PEXT64,
28757
28758 /* ADX instructions. */
28759 IX86_BUILTIN_ADDCARRYX32,
28760 IX86_BUILTIN_ADDCARRYX64,
28761
28762 /* FSGSBASE instructions. */
28763 IX86_BUILTIN_RDFSBASE32,
28764 IX86_BUILTIN_RDFSBASE64,
28765 IX86_BUILTIN_RDGSBASE32,
28766 IX86_BUILTIN_RDGSBASE64,
28767 IX86_BUILTIN_WRFSBASE32,
28768 IX86_BUILTIN_WRFSBASE64,
28769 IX86_BUILTIN_WRGSBASE32,
28770 IX86_BUILTIN_WRGSBASE64,
28771
28772 /* RDRND instructions. */
28773 IX86_BUILTIN_RDRAND16_STEP,
28774 IX86_BUILTIN_RDRAND32_STEP,
28775 IX86_BUILTIN_RDRAND64_STEP,
28776
28777 /* RDSEED instructions. */
28778 IX86_BUILTIN_RDSEED16_STEP,
28779 IX86_BUILTIN_RDSEED32_STEP,
28780 IX86_BUILTIN_RDSEED64_STEP,
28781
28782 /* F16C instructions. */
28783 IX86_BUILTIN_CVTPH2PS,
28784 IX86_BUILTIN_CVTPH2PS256,
28785 IX86_BUILTIN_CVTPS2PH,
28786 IX86_BUILTIN_CVTPS2PH256,
28787
28788 /* CFString built-in for darwin */
28789 IX86_BUILTIN_CFSTRING,
28790
28791 /* Builtins to get CPU type and supported features. */
28792 IX86_BUILTIN_CPU_INIT,
28793 IX86_BUILTIN_CPU_IS,
28794 IX86_BUILTIN_CPU_SUPPORTS,
28795
28796 /* Read/write FLAGS register built-ins. */
28797 IX86_BUILTIN_READ_FLAGS,
28798 IX86_BUILTIN_WRITE_FLAGS,
28799
28800 IX86_BUILTIN_MAX
28801 };
28802
28803 /* Table for the ix86 builtin decls. */
28804 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28805
28806 /* Table of all of the builtin functions that are possible with different ISA's
28807 but are waiting to be built until a function is declared to use that
28808 ISA. */
28809 struct builtin_isa {
28810 const char *name; /* function name */
28811 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28812 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28813 bool const_p; /* true if the declaration is constant */
28814 bool set_and_not_built_p;
28815 };
28816
28817 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28818
28819
28820 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28821 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28822 function decl in the ix86_builtins array. Returns the function decl or
28823 NULL_TREE, if the builtin was not added.
28824
28825 If the front end has a special hook for builtin functions, delay adding
28826 builtin functions that aren't in the current ISA until the ISA is changed
28827 with function specific optimization. Doing so, can save about 300K for the
28828 default compiler. When the builtin is expanded, check at that time whether
28829 it is valid.
28830
28831 If the front end doesn't have a special hook, record all builtins, even if
28832 it isn't an instruction set in the current ISA in case the user uses
28833 function specific options for a different ISA, so that we don't get scope
28834 errors if a builtin is added in the middle of a function scope. */
28835
28836 static inline tree
28837 def_builtin (HOST_WIDE_INT mask, const char *name,
28838 enum ix86_builtin_func_type tcode,
28839 enum ix86_builtins code)
28840 {
28841 tree decl = NULL_TREE;
28842
28843 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28844 {
28845 ix86_builtins_isa[(int) code].isa = mask;
28846
28847 mask &= ~OPTION_MASK_ISA_64BIT;
28848 if (mask == 0
28849 || (mask & ix86_isa_flags) != 0
28850 || (lang_hooks.builtin_function
28851 == lang_hooks.builtin_function_ext_scope))
28852
28853 {
28854 tree type = ix86_get_builtin_func_type (tcode);
28855 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28856 NULL, NULL_TREE);
28857 ix86_builtins[(int) code] = decl;
28858 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28859 }
28860 else
28861 {
28862 ix86_builtins[(int) code] = NULL_TREE;
28863 ix86_builtins_isa[(int) code].tcode = tcode;
28864 ix86_builtins_isa[(int) code].name = name;
28865 ix86_builtins_isa[(int) code].const_p = false;
28866 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28867 }
28868 }
28869
28870 return decl;
28871 }
28872
28873 /* Like def_builtin, but also marks the function decl "const". */
28874
28875 static inline tree
28876 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28877 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28878 {
28879 tree decl = def_builtin (mask, name, tcode, code);
28880 if (decl)
28881 TREE_READONLY (decl) = 1;
28882 else
28883 ix86_builtins_isa[(int) code].const_p = true;
28884
28885 return decl;
28886 }
28887
28888 /* Add any new builtin functions for a given ISA that may not have been
28889 declared. This saves a bit of space compared to adding all of the
28890 declarations to the tree, even if we didn't use them. */
28891
28892 static void
28893 ix86_add_new_builtins (HOST_WIDE_INT isa)
28894 {
28895 int i;
28896
28897 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28898 {
28899 if ((ix86_builtins_isa[i].isa & isa) != 0
28900 && ix86_builtins_isa[i].set_and_not_built_p)
28901 {
28902 tree decl, type;
28903
28904 /* Don't define the builtin again. */
28905 ix86_builtins_isa[i].set_and_not_built_p = false;
28906
28907 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28908 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28909 type, i, BUILT_IN_MD, NULL,
28910 NULL_TREE);
28911
28912 ix86_builtins[i] = decl;
28913 if (ix86_builtins_isa[i].const_p)
28914 TREE_READONLY (decl) = 1;
28915 }
28916 }
28917 }
28918
28919 /* Bits for builtin_description.flag. */
28920
28921 /* Set when we don't support the comparison natively, and should
28922 swap_comparison in order to support it. */
28923 #define BUILTIN_DESC_SWAP_OPERANDS 1
28924
28925 struct builtin_description
28926 {
28927 const HOST_WIDE_INT mask;
28928 const enum insn_code icode;
28929 const char *const name;
28930 const enum ix86_builtins code;
28931 const enum rtx_code comparison;
28932 const int flag;
28933 };
28934
28935 static const struct builtin_description bdesc_comi[] =
28936 {
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28961 };
28962
28963 static const struct builtin_description bdesc_pcmpestr[] =
28964 {
28965 /* SSE4.2 */
28966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28973 };
28974
28975 static const struct builtin_description bdesc_pcmpistr[] =
28976 {
28977 /* SSE4.2 */
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28985 };
28986
28987 /* Special builtins with variable number of arguments. */
28988 static const struct builtin_description bdesc_special_args[] =
28989 {
28990 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28991 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28992 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28993
28994 /* 80387 (for use internally for atomic compound assignment). */
28995 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28996 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28997 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28998 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28999
29000 /* MMX */
29001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29002
29003 /* 3DNow! */
29004 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29005
29006 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29007 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29008 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29009 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29011 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29012 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015
29016 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29017 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29018 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29019 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29020 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29021 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29022 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29023 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29024
29025 /* SSE */
29026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29029
29030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29034
29035 /* SSE or 3DNow!A */
29036 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29037 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29038
29039 /* SSE2 */
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29047 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29050
29051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29053
29054 /* SSE3 */
29055 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29056
29057 /* SSE4.1 */
29058 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29059
29060 /* SSE4A */
29061 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29062 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29063
29064 /* AVX */
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29067
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29073
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29081
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29085
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29094
29095 /* AVX2 */
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29105
29106 /* AVX512F */
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29154
29155 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29156 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29157 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29158 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29159 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29160 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29161
29162 /* FSGSBASE */
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29166 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29167 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29168 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29169 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29170 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29171
29172 /* RTM */
29173 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29174 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29175 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29176 };
29177
29178 /* Builtins with variable number of arguments. */
29179 static const struct builtin_description bdesc_args[] =
29180 {
29181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29182 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29183 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29184 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29185 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29186 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29187 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29188
29189 /* MMX */
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29196
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29205
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29208
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29213
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29220
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29227
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29231
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29233
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29240
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29247
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29252
29253 /* 3DNow! */
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29258
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29274
29275 /* 3DNow!A */
29276 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29277 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29278 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29279 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29280 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29281 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29282
29283 /* SSE */
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29296
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29298
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29307
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29328
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29346
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29349 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29350
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29352
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29356
29357 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29358 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29359
29360 /* SSE MMX or 3Dnow!A */
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29364
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29369
29370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29372
29373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29374
29375 /* SSE2 */
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29377
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29383
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29389
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29391
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29394 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29395 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29396
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29400
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29409
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29430
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29442
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29446
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29448
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29457
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29466
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29469
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29474
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29477
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29484
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29489
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29498
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29502
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29505
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29508
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29510
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29512 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29515
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29523
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29531
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29536
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29540
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29542
29543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29544
29545 /* SSE2 MMX */
29546 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29547 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29548
29549 /* SSE3 */
29550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29552
29553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29554 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29556 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29558 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29559
29560 /* SSSE3 */
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29567
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29592
29593 /* SSSE3. */
29594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29596
29597 /* SSE4.1 */
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29608
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29622
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29635
29636 /* SSE4.1 */
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29641
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29646
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29649
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29652
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29657
29658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29660
29661 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29662 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29663
29664 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29665 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29666 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29667
29668 /* SSE4.2 */
29669 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29670 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29671 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29672 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29673 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29674
29675 /* SSE4A */
29676 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29677 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29678 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29679 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29680
29681 /* AES */
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29684
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29687 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29689
29690 /* PCLMUL */
29691 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29692
29693 /* AVX */
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29720
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29725
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29760
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29764
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29770
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29772
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29775
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29780
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29783
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29786
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29791
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29794
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29797
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29802
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29809
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29825
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29828
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29831
29832 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29833
29834 /* AVX2 */
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29981
29982 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29983
29984 /* BMI */
29985 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29986 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29987 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29988
29989 /* TBM */
29990 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29991 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29992
29993 /* F16C */
29994 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29995 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29996 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29997 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29998
29999 /* BMI2 */
30000 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30001 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30002 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30003 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30004 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30005 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30006
30007 /* AVX512F */
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30058 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30169 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30170 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30171 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30172 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30199
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30204 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30208
30209 /* Mask arithmetic operations */
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30220
30221 /* SHA */
30222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30229 };
30230
30231 /* Builtins with rounding support. */
30232 static const struct builtin_description bdesc_round_args[] =
30233 {
30234 /* AVX512F */
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30254 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30256 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30263 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30265 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30315 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30319 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30323 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30325 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30327 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30329 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30354
30355 /* AVX512ER */
30356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30359 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30360 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30361 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30362 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30363 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30364 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30365 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30366 };
30367
30368 /* FMA4 and XOP. */
30369 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30370 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30371 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30372 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30373 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30374 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30375 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30376 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30377 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30378 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30379 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30380 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30381 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30382 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30383 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30384 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30385 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30386 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30387 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30388 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30389 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30390 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30391 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30392 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30393 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30394 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30395 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30396 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30397 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30398 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30399 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30400 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30401 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30402 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30403 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30404 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30405 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30406 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30407 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30408 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30409 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30410 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30411 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30412 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30413 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30414 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30415 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30416 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30417 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30418 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30419 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30420 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30421
30422 static const struct builtin_description bdesc_multi_arg[] =
30423 {
30424 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30425 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30426 UNKNOWN, (int)MULTI_ARG_3_SF },
30427 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30428 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30429 UNKNOWN, (int)MULTI_ARG_3_DF },
30430
30431 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30432 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30433 UNKNOWN, (int)MULTI_ARG_3_SF },
30434 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30435 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30436 UNKNOWN, (int)MULTI_ARG_3_DF },
30437
30438 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30439 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30440 UNKNOWN, (int)MULTI_ARG_3_SF },
30441 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30442 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30443 UNKNOWN, (int)MULTI_ARG_3_DF },
30444 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30445 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30446 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30447 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30448 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30449 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30450
30451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30452 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30453 UNKNOWN, (int)MULTI_ARG_3_SF },
30454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30455 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30456 UNKNOWN, (int)MULTI_ARG_3_DF },
30457 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30458 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30459 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30460 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30461 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30462 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30463
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30471
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30479
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30481
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30494
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30511
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30518
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30534
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30542
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30550
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30558
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30566
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30574
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30582
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30590
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30598
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30607
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30616
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30621
30622 };
30623 \f
30624 /* TM vector builtins. */
30625
30626 /* Reuse the existing x86-specific `struct builtin_description' cause
30627 we're lazy. Add casts to make them fit. */
30628 static const struct builtin_description bdesc_tm[] =
30629 {
30630 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30631 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30632 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30633 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30634 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30635 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30636 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30637
30638 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30639 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30640 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30641 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30642 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30643 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30644 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30645
30646 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30647 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30648 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30649 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30650 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30651 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30652 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30653
30654 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30655 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30656 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30657 };
30658
30659 /* TM callbacks. */
30660
30661 /* Return the builtin decl needed to load a vector of TYPE. */
30662
30663 static tree
30664 ix86_builtin_tm_load (tree type)
30665 {
30666 if (TREE_CODE (type) == VECTOR_TYPE)
30667 {
30668 switch (tree_to_uhwi (TYPE_SIZE (type)))
30669 {
30670 case 64:
30671 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30672 case 128:
30673 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30674 case 256:
30675 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30676 }
30677 }
30678 return NULL_TREE;
30679 }
30680
30681 /* Return the builtin decl needed to store a vector of TYPE. */
30682
30683 static tree
30684 ix86_builtin_tm_store (tree type)
30685 {
30686 if (TREE_CODE (type) == VECTOR_TYPE)
30687 {
30688 switch (tree_to_uhwi (TYPE_SIZE (type)))
30689 {
30690 case 64:
30691 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30692 case 128:
30693 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30694 case 256:
30695 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30696 }
30697 }
30698 return NULL_TREE;
30699 }
30700 \f
30701 /* Initialize the transactional memory vector load/store builtins. */
30702
30703 static void
30704 ix86_init_tm_builtins (void)
30705 {
30706 enum ix86_builtin_func_type ftype;
30707 const struct builtin_description *d;
30708 size_t i;
30709 tree decl;
30710 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30711 tree attrs_log, attrs_type_log;
30712
30713 if (!flag_tm)
30714 return;
30715
30716 /* If there are no builtins defined, we must be compiling in a
30717 language without trans-mem support. */
30718 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30719 return;
30720
30721 /* Use whatever attributes a normal TM load has. */
30722 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30723 attrs_load = DECL_ATTRIBUTES (decl);
30724 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30725 /* Use whatever attributes a normal TM store has. */
30726 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30727 attrs_store = DECL_ATTRIBUTES (decl);
30728 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30729 /* Use whatever attributes a normal TM log has. */
30730 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30731 attrs_log = DECL_ATTRIBUTES (decl);
30732 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30733
30734 for (i = 0, d = bdesc_tm;
30735 i < ARRAY_SIZE (bdesc_tm);
30736 i++, d++)
30737 {
30738 if ((d->mask & ix86_isa_flags) != 0
30739 || (lang_hooks.builtin_function
30740 == lang_hooks.builtin_function_ext_scope))
30741 {
30742 tree type, attrs, attrs_type;
30743 enum built_in_function code = (enum built_in_function) d->code;
30744
30745 ftype = (enum ix86_builtin_func_type) d->flag;
30746 type = ix86_get_builtin_func_type (ftype);
30747
30748 if (BUILTIN_TM_LOAD_P (code))
30749 {
30750 attrs = attrs_load;
30751 attrs_type = attrs_type_load;
30752 }
30753 else if (BUILTIN_TM_STORE_P (code))
30754 {
30755 attrs = attrs_store;
30756 attrs_type = attrs_type_store;
30757 }
30758 else
30759 {
30760 attrs = attrs_log;
30761 attrs_type = attrs_type_log;
30762 }
30763 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30764 /* The builtin without the prefix for
30765 calling it directly. */
30766 d->name + strlen ("__builtin_"),
30767 attrs);
30768 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30769 set the TYPE_ATTRIBUTES. */
30770 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30771
30772 set_builtin_decl (code, decl, false);
30773 }
30774 }
30775 }
30776
30777 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30778 in the current target ISA to allow the user to compile particular modules
30779 with different target specific options that differ from the command line
30780 options. */
30781 static void
30782 ix86_init_mmx_sse_builtins (void)
30783 {
30784 const struct builtin_description * d;
30785 enum ix86_builtin_func_type ftype;
30786 size_t i;
30787
30788 /* Add all special builtins with variable number of operands. */
30789 for (i = 0, d = bdesc_special_args;
30790 i < ARRAY_SIZE (bdesc_special_args);
30791 i++, d++)
30792 {
30793 if (d->name == 0)
30794 continue;
30795
30796 ftype = (enum ix86_builtin_func_type) d->flag;
30797 def_builtin (d->mask, d->name, ftype, d->code);
30798 }
30799
30800 /* Add all builtins with variable number of operands. */
30801 for (i = 0, d = bdesc_args;
30802 i < ARRAY_SIZE (bdesc_args);
30803 i++, d++)
30804 {
30805 if (d->name == 0)
30806 continue;
30807
30808 ftype = (enum ix86_builtin_func_type) d->flag;
30809 def_builtin_const (d->mask, d->name, ftype, d->code);
30810 }
30811
30812 /* Add all builtins with rounding. */
30813 for (i = 0, d = bdesc_round_args;
30814 i < ARRAY_SIZE (bdesc_round_args);
30815 i++, d++)
30816 {
30817 if (d->name == 0)
30818 continue;
30819
30820 ftype = (enum ix86_builtin_func_type) d->flag;
30821 def_builtin_const (d->mask, d->name, ftype, d->code);
30822 }
30823
30824 /* pcmpestr[im] insns. */
30825 for (i = 0, d = bdesc_pcmpestr;
30826 i < ARRAY_SIZE (bdesc_pcmpestr);
30827 i++, d++)
30828 {
30829 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30830 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30831 else
30832 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30833 def_builtin_const (d->mask, d->name, ftype, d->code);
30834 }
30835
30836 /* pcmpistr[im] insns. */
30837 for (i = 0, d = bdesc_pcmpistr;
30838 i < ARRAY_SIZE (bdesc_pcmpistr);
30839 i++, d++)
30840 {
30841 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30842 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30843 else
30844 ftype = INT_FTYPE_V16QI_V16QI_INT;
30845 def_builtin_const (d->mask, d->name, ftype, d->code);
30846 }
30847
30848 /* comi/ucomi insns. */
30849 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30850 {
30851 if (d->mask == OPTION_MASK_ISA_SSE2)
30852 ftype = INT_FTYPE_V2DF_V2DF;
30853 else
30854 ftype = INT_FTYPE_V4SF_V4SF;
30855 def_builtin_const (d->mask, d->name, ftype, d->code);
30856 }
30857
30858 /* SSE */
30859 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30860 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30861 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30862 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30863
30864 /* SSE or 3DNow!A */
30865 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30866 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30867 IX86_BUILTIN_MASKMOVQ);
30868
30869 /* SSE2 */
30870 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30871 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30872
30873 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30874 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30875 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30876 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30877
30878 /* SSE3. */
30879 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30880 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30881 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30882 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30883
30884 /* AES */
30885 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30886 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30887 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30888 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30889 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30890 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30891 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30892 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30893 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30894 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30895 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30896 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30897
30898 /* PCLMUL */
30899 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30900 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30901
30902 /* RDRND */
30903 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30904 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30905 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30906 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30907 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30908 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30909 IX86_BUILTIN_RDRAND64_STEP);
30910
30911 /* AVX2 */
30912 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30913 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30914 IX86_BUILTIN_GATHERSIV2DF);
30915
30916 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30917 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30918 IX86_BUILTIN_GATHERSIV4DF);
30919
30920 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30921 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30922 IX86_BUILTIN_GATHERDIV2DF);
30923
30924 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30925 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30926 IX86_BUILTIN_GATHERDIV4DF);
30927
30928 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30929 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30930 IX86_BUILTIN_GATHERSIV4SF);
30931
30932 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30933 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30934 IX86_BUILTIN_GATHERSIV8SF);
30935
30936 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30937 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30938 IX86_BUILTIN_GATHERDIV4SF);
30939
30940 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30941 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30942 IX86_BUILTIN_GATHERDIV8SF);
30943
30944 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30945 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30946 IX86_BUILTIN_GATHERSIV2DI);
30947
30948 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30949 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30950 IX86_BUILTIN_GATHERSIV4DI);
30951
30952 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30953 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30954 IX86_BUILTIN_GATHERDIV2DI);
30955
30956 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30957 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30958 IX86_BUILTIN_GATHERDIV4DI);
30959
30960 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30961 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30962 IX86_BUILTIN_GATHERSIV4SI);
30963
30964 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30965 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30966 IX86_BUILTIN_GATHERSIV8SI);
30967
30968 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30969 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30970 IX86_BUILTIN_GATHERDIV4SI);
30971
30972 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30973 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30974 IX86_BUILTIN_GATHERDIV8SI);
30975
30976 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30977 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30978 IX86_BUILTIN_GATHERALTSIV4DF);
30979
30980 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30981 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30982 IX86_BUILTIN_GATHERALTDIV8SF);
30983
30984 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30985 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30986 IX86_BUILTIN_GATHERALTSIV4DI);
30987
30988 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30989 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30990 IX86_BUILTIN_GATHERALTDIV8SI);
30991
30992 /* AVX512F */
30993 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30994 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30995 IX86_BUILTIN_GATHER3SIV16SF);
30996
30997 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30998 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30999 IX86_BUILTIN_GATHER3SIV8DF);
31000
31001 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31002 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31003 IX86_BUILTIN_GATHER3DIV16SF);
31004
31005 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31006 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31007 IX86_BUILTIN_GATHER3DIV8DF);
31008
31009 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31010 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31011 IX86_BUILTIN_GATHER3SIV16SI);
31012
31013 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31014 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31015 IX86_BUILTIN_GATHER3SIV8DI);
31016
31017 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31018 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31019 IX86_BUILTIN_GATHER3DIV16SI);
31020
31021 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31022 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31023 IX86_BUILTIN_GATHER3DIV8DI);
31024
31025 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31026 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31027 IX86_BUILTIN_GATHER3ALTSIV8DF);
31028
31029 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31030 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31031 IX86_BUILTIN_GATHER3ALTDIV16SF);
31032
31033 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31034 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31035 IX86_BUILTIN_GATHER3ALTSIV8DI);
31036
31037 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31038 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31039 IX86_BUILTIN_GATHER3ALTDIV16SI);
31040
31041 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31042 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31043 IX86_BUILTIN_SCATTERSIV16SF);
31044
31045 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31046 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31047 IX86_BUILTIN_SCATTERSIV8DF);
31048
31049 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31050 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31051 IX86_BUILTIN_SCATTERDIV16SF);
31052
31053 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31054 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31055 IX86_BUILTIN_SCATTERDIV8DF);
31056
31057 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31058 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31059 IX86_BUILTIN_SCATTERSIV16SI);
31060
31061 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31062 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31063 IX86_BUILTIN_SCATTERSIV8DI);
31064
31065 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31066 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31067 IX86_BUILTIN_SCATTERDIV16SI);
31068
31069 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31070 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31071 IX86_BUILTIN_SCATTERDIV8DI);
31072
31073 /* AVX512PF */
31074 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31075 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31076 IX86_BUILTIN_GATHERPFDPD);
31077 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31078 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31079 IX86_BUILTIN_GATHERPFDPS);
31080 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31081 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31082 IX86_BUILTIN_GATHERPFQPD);
31083 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31084 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31085 IX86_BUILTIN_GATHERPFQPS);
31086 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31087 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31088 IX86_BUILTIN_SCATTERPFDPD);
31089 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31090 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31091 IX86_BUILTIN_SCATTERPFDPS);
31092 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31093 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31094 IX86_BUILTIN_SCATTERPFQPD);
31095 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31096 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31097 IX86_BUILTIN_SCATTERPFQPS);
31098
31099 /* SHA */
31100 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31101 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31102 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31103 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31104 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31105 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31106 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31107 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31108 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31109 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31110 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31111 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31112 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31113 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31114
31115 /* RTM. */
31116 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31117 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31118
31119 /* MMX access to the vec_init patterns. */
31120 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31121 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31122
31123 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31124 V4HI_FTYPE_HI_HI_HI_HI,
31125 IX86_BUILTIN_VEC_INIT_V4HI);
31126
31127 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31128 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31129 IX86_BUILTIN_VEC_INIT_V8QI);
31130
31131 /* Access to the vec_extract patterns. */
31132 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31133 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31134 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31135 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31136 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31137 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31138 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31139 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31140 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31141 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31142
31143 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31144 "__builtin_ia32_vec_ext_v4hi",
31145 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31146
31147 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31148 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31149
31150 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31151 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31152
31153 /* Access to the vec_set patterns. */
31154 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31155 "__builtin_ia32_vec_set_v2di",
31156 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31157
31158 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31159 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31160
31161 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31162 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31163
31164 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31165 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31166
31167 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31168 "__builtin_ia32_vec_set_v4hi",
31169 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31170
31171 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31172 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31173
31174 /* RDSEED */
31175 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31176 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31177 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31178 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31179 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31180 "__builtin_ia32_rdseed_di_step",
31181 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31182
31183 /* ADCX */
31184 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31185 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31186 def_builtin (OPTION_MASK_ISA_64BIT,
31187 "__builtin_ia32_addcarryx_u64",
31188 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31189 IX86_BUILTIN_ADDCARRYX64);
31190
31191 /* Read/write FLAGS. */
31192 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31193 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31194 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31195 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31196 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31197 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31198 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31199 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31200
31201 /* CLFLUSHOPT. */
31202 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31203 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31204
31205 /* Add FMA4 multi-arg argument instructions */
31206 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31207 {
31208 if (d->name == 0)
31209 continue;
31210
31211 ftype = (enum ix86_builtin_func_type) d->flag;
31212 def_builtin_const (d->mask, d->name, ftype, d->code);
31213 }
31214 }
31215
31216 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31217 to return a pointer to VERSION_DECL if the outcome of the expression
31218 formed by PREDICATE_CHAIN is true. This function will be called during
31219 version dispatch to decide which function version to execute. It returns
31220 the basic block at the end, to which more conditions can be added. */
31221
31222 static basic_block
31223 add_condition_to_bb (tree function_decl, tree version_decl,
31224 tree predicate_chain, basic_block new_bb)
31225 {
31226 gimple return_stmt;
31227 tree convert_expr, result_var;
31228 gimple convert_stmt;
31229 gimple call_cond_stmt;
31230 gimple if_else_stmt;
31231
31232 basic_block bb1, bb2, bb3;
31233 edge e12, e23;
31234
31235 tree cond_var, and_expr_var = NULL_TREE;
31236 gimple_seq gseq;
31237
31238 tree predicate_decl, predicate_arg;
31239
31240 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31241
31242 gcc_assert (new_bb != NULL);
31243 gseq = bb_seq (new_bb);
31244
31245
31246 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31247 build_fold_addr_expr (version_decl));
31248 result_var = create_tmp_var (ptr_type_node, NULL);
31249 convert_stmt = gimple_build_assign (result_var, convert_expr);
31250 return_stmt = gimple_build_return (result_var);
31251
31252 if (predicate_chain == NULL_TREE)
31253 {
31254 gimple_seq_add_stmt (&gseq, convert_stmt);
31255 gimple_seq_add_stmt (&gseq, return_stmt);
31256 set_bb_seq (new_bb, gseq);
31257 gimple_set_bb (convert_stmt, new_bb);
31258 gimple_set_bb (return_stmt, new_bb);
31259 pop_cfun ();
31260 return new_bb;
31261 }
31262
31263 while (predicate_chain != NULL)
31264 {
31265 cond_var = create_tmp_var (integer_type_node, NULL);
31266 predicate_decl = TREE_PURPOSE (predicate_chain);
31267 predicate_arg = TREE_VALUE (predicate_chain);
31268 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31269 gimple_call_set_lhs (call_cond_stmt, cond_var);
31270
31271 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31272 gimple_set_bb (call_cond_stmt, new_bb);
31273 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31274
31275 predicate_chain = TREE_CHAIN (predicate_chain);
31276
31277 if (and_expr_var == NULL)
31278 and_expr_var = cond_var;
31279 else
31280 {
31281 gimple assign_stmt;
31282 /* Use MIN_EXPR to check if any integer is zero?.
31283 and_expr_var = min_expr <cond_var, and_expr_var> */
31284 assign_stmt = gimple_build_assign (and_expr_var,
31285 build2 (MIN_EXPR, integer_type_node,
31286 cond_var, and_expr_var));
31287
31288 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31289 gimple_set_bb (assign_stmt, new_bb);
31290 gimple_seq_add_stmt (&gseq, assign_stmt);
31291 }
31292 }
31293
31294 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31295 integer_zero_node,
31296 NULL_TREE, NULL_TREE);
31297 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31298 gimple_set_bb (if_else_stmt, new_bb);
31299 gimple_seq_add_stmt (&gseq, if_else_stmt);
31300
31301 gimple_seq_add_stmt (&gseq, convert_stmt);
31302 gimple_seq_add_stmt (&gseq, return_stmt);
31303 set_bb_seq (new_bb, gseq);
31304
31305 bb1 = new_bb;
31306 e12 = split_block (bb1, if_else_stmt);
31307 bb2 = e12->dest;
31308 e12->flags &= ~EDGE_FALLTHRU;
31309 e12->flags |= EDGE_TRUE_VALUE;
31310
31311 e23 = split_block (bb2, return_stmt);
31312
31313 gimple_set_bb (convert_stmt, bb2);
31314 gimple_set_bb (return_stmt, bb2);
31315
31316 bb3 = e23->dest;
31317 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31318
31319 remove_edge (e23);
31320 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31321
31322 pop_cfun ();
31323
31324 return bb3;
31325 }
31326
31327 /* This parses the attribute arguments to target in DECL and determines
31328 the right builtin to use to match the platform specification.
31329 It returns the priority value for this version decl. If PREDICATE_LIST
31330 is not NULL, it stores the list of cpu features that need to be checked
31331 before dispatching this function. */
31332
31333 static unsigned int
31334 get_builtin_code_for_version (tree decl, tree *predicate_list)
31335 {
31336 tree attrs;
31337 struct cl_target_option cur_target;
31338 tree target_node;
31339 struct cl_target_option *new_target;
31340 const char *arg_str = NULL;
31341 const char *attrs_str = NULL;
31342 char *tok_str = NULL;
31343 char *token;
31344
31345 /* Priority of i386 features, greater value is higher priority. This is
31346 used to decide the order in which function dispatch must happen. For
31347 instance, a version specialized for SSE4.2 should be checked for dispatch
31348 before a version for SSE3, as SSE4.2 implies SSE3. */
31349 enum feature_priority
31350 {
31351 P_ZERO = 0,
31352 P_MMX,
31353 P_SSE,
31354 P_SSE2,
31355 P_SSE3,
31356 P_SSSE3,
31357 P_PROC_SSSE3,
31358 P_SSE4_A,
31359 P_PROC_SSE4_A,
31360 P_SSE4_1,
31361 P_SSE4_2,
31362 P_PROC_SSE4_2,
31363 P_POPCNT,
31364 P_AVX,
31365 P_PROC_AVX,
31366 P_FMA4,
31367 P_XOP,
31368 P_PROC_XOP,
31369 P_FMA,
31370 P_PROC_FMA,
31371 P_AVX2,
31372 P_PROC_AVX2
31373 };
31374
31375 enum feature_priority priority = P_ZERO;
31376
31377 /* These are the target attribute strings for which a dispatcher is
31378 available, from fold_builtin_cpu. */
31379
31380 static struct _feature_list
31381 {
31382 const char *const name;
31383 const enum feature_priority priority;
31384 }
31385 const feature_list[] =
31386 {
31387 {"mmx", P_MMX},
31388 {"sse", P_SSE},
31389 {"sse2", P_SSE2},
31390 {"sse3", P_SSE3},
31391 {"sse4a", P_SSE4_A},
31392 {"ssse3", P_SSSE3},
31393 {"sse4.1", P_SSE4_1},
31394 {"sse4.2", P_SSE4_2},
31395 {"popcnt", P_POPCNT},
31396 {"avx", P_AVX},
31397 {"fma4", P_FMA4},
31398 {"xop", P_XOP},
31399 {"fma", P_FMA},
31400 {"avx2", P_AVX2}
31401 };
31402
31403
31404 static unsigned int NUM_FEATURES
31405 = sizeof (feature_list) / sizeof (struct _feature_list);
31406
31407 unsigned int i;
31408
31409 tree predicate_chain = NULL_TREE;
31410 tree predicate_decl, predicate_arg;
31411
31412 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31413 gcc_assert (attrs != NULL);
31414
31415 attrs = TREE_VALUE (TREE_VALUE (attrs));
31416
31417 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31418 attrs_str = TREE_STRING_POINTER (attrs);
31419
31420 /* Return priority zero for default function. */
31421 if (strcmp (attrs_str, "default") == 0)
31422 return 0;
31423
31424 /* Handle arch= if specified. For priority, set it to be 1 more than
31425 the best instruction set the processor can handle. For instance, if
31426 there is a version for atom and a version for ssse3 (the highest ISA
31427 priority for atom), the atom version must be checked for dispatch
31428 before the ssse3 version. */
31429 if (strstr (attrs_str, "arch=") != NULL)
31430 {
31431 cl_target_option_save (&cur_target, &global_options);
31432 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31433 &global_options_set);
31434
31435 gcc_assert (target_node);
31436 new_target = TREE_TARGET_OPTION (target_node);
31437 gcc_assert (new_target);
31438
31439 if (new_target->arch_specified && new_target->arch > 0)
31440 {
31441 switch (new_target->arch)
31442 {
31443 case PROCESSOR_CORE2:
31444 arg_str = "core2";
31445 priority = P_PROC_SSSE3;
31446 break;
31447 case PROCESSOR_NEHALEM:
31448 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31449 arg_str = "westmere";
31450 else
31451 /* We translate "arch=corei7" and "arch=nehalem" to
31452 "corei7" so that it will be mapped to M_INTEL_COREI7
31453 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31454 arg_str = "corei7";
31455 priority = P_PROC_SSE4_2;
31456 break;
31457 case PROCESSOR_SANDYBRIDGE:
31458 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31459 arg_str = "ivybridge";
31460 else
31461 arg_str = "sandybridge";
31462 priority = P_PROC_AVX;
31463 break;
31464 case PROCESSOR_HASWELL:
31465 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31466 arg_str = "broadwell";
31467 else
31468 arg_str = "haswell";
31469 priority = P_PROC_AVX2;
31470 break;
31471 case PROCESSOR_BONNELL:
31472 arg_str = "bonnell";
31473 priority = P_PROC_SSSE3;
31474 break;
31475 case PROCESSOR_SILVERMONT:
31476 arg_str = "silvermont";
31477 priority = P_PROC_SSE4_2;
31478 break;
31479 case PROCESSOR_AMDFAM10:
31480 arg_str = "amdfam10h";
31481 priority = P_PROC_SSE4_A;
31482 break;
31483 case PROCESSOR_BTVER1:
31484 arg_str = "btver1";
31485 priority = P_PROC_SSE4_A;
31486 break;
31487 case PROCESSOR_BTVER2:
31488 arg_str = "btver2";
31489 priority = P_PROC_AVX;
31490 break;
31491 case PROCESSOR_BDVER1:
31492 arg_str = "bdver1";
31493 priority = P_PROC_XOP;
31494 break;
31495 case PROCESSOR_BDVER2:
31496 arg_str = "bdver2";
31497 priority = P_PROC_FMA;
31498 break;
31499 case PROCESSOR_BDVER3:
31500 arg_str = "bdver3";
31501 priority = P_PROC_FMA;
31502 break;
31503 case PROCESSOR_BDVER4:
31504 arg_str = "bdver4";
31505 priority = P_PROC_AVX2;
31506 break;
31507 }
31508 }
31509
31510 cl_target_option_restore (&global_options, &cur_target);
31511
31512 if (predicate_list && arg_str == NULL)
31513 {
31514 error_at (DECL_SOURCE_LOCATION (decl),
31515 "No dispatcher found for the versioning attributes");
31516 return 0;
31517 }
31518
31519 if (predicate_list)
31520 {
31521 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31522 /* For a C string literal the length includes the trailing NULL. */
31523 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31524 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31525 predicate_chain);
31526 }
31527 }
31528
31529 /* Process feature name. */
31530 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31531 strcpy (tok_str, attrs_str);
31532 token = strtok (tok_str, ",");
31533 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31534
31535 while (token != NULL)
31536 {
31537 /* Do not process "arch=" */
31538 if (strncmp (token, "arch=", 5) == 0)
31539 {
31540 token = strtok (NULL, ",");
31541 continue;
31542 }
31543 for (i = 0; i < NUM_FEATURES; ++i)
31544 {
31545 if (strcmp (token, feature_list[i].name) == 0)
31546 {
31547 if (predicate_list)
31548 {
31549 predicate_arg = build_string_literal (
31550 strlen (feature_list[i].name) + 1,
31551 feature_list[i].name);
31552 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31553 predicate_chain);
31554 }
31555 /* Find the maximum priority feature. */
31556 if (feature_list[i].priority > priority)
31557 priority = feature_list[i].priority;
31558
31559 break;
31560 }
31561 }
31562 if (predicate_list && i == NUM_FEATURES)
31563 {
31564 error_at (DECL_SOURCE_LOCATION (decl),
31565 "No dispatcher found for %s", token);
31566 return 0;
31567 }
31568 token = strtok (NULL, ",");
31569 }
31570 free (tok_str);
31571
31572 if (predicate_list && predicate_chain == NULL_TREE)
31573 {
31574 error_at (DECL_SOURCE_LOCATION (decl),
31575 "No dispatcher found for the versioning attributes : %s",
31576 attrs_str);
31577 return 0;
31578 }
31579 else if (predicate_list)
31580 {
31581 predicate_chain = nreverse (predicate_chain);
31582 *predicate_list = predicate_chain;
31583 }
31584
31585 return priority;
31586 }
31587
31588 /* This compares the priority of target features in function DECL1
31589 and DECL2. It returns positive value if DECL1 is higher priority,
31590 negative value if DECL2 is higher priority and 0 if they are the
31591 same. */
31592
31593 static int
31594 ix86_compare_version_priority (tree decl1, tree decl2)
31595 {
31596 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31597 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31598
31599 return (int)priority1 - (int)priority2;
31600 }
31601
31602 /* V1 and V2 point to function versions with different priorities
31603 based on the target ISA. This function compares their priorities. */
31604
31605 static int
31606 feature_compare (const void *v1, const void *v2)
31607 {
31608 typedef struct _function_version_info
31609 {
31610 tree version_decl;
31611 tree predicate_chain;
31612 unsigned int dispatch_priority;
31613 } function_version_info;
31614
31615 const function_version_info c1 = *(const function_version_info *)v1;
31616 const function_version_info c2 = *(const function_version_info *)v2;
31617 return (c2.dispatch_priority - c1.dispatch_priority);
31618 }
31619
31620 /* This function generates the dispatch function for
31621 multi-versioned functions. DISPATCH_DECL is the function which will
31622 contain the dispatch logic. FNDECLS are the function choices for
31623 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31624 in DISPATCH_DECL in which the dispatch code is generated. */
31625
31626 static int
31627 dispatch_function_versions (tree dispatch_decl,
31628 void *fndecls_p,
31629 basic_block *empty_bb)
31630 {
31631 tree default_decl;
31632 gimple ifunc_cpu_init_stmt;
31633 gimple_seq gseq;
31634 int ix;
31635 tree ele;
31636 vec<tree> *fndecls;
31637 unsigned int num_versions = 0;
31638 unsigned int actual_versions = 0;
31639 unsigned int i;
31640
31641 struct _function_version_info
31642 {
31643 tree version_decl;
31644 tree predicate_chain;
31645 unsigned int dispatch_priority;
31646 }*function_version_info;
31647
31648 gcc_assert (dispatch_decl != NULL
31649 && fndecls_p != NULL
31650 && empty_bb != NULL);
31651
31652 /*fndecls_p is actually a vector. */
31653 fndecls = static_cast<vec<tree> *> (fndecls_p);
31654
31655 /* At least one more version other than the default. */
31656 num_versions = fndecls->length ();
31657 gcc_assert (num_versions >= 2);
31658
31659 function_version_info = (struct _function_version_info *)
31660 XNEWVEC (struct _function_version_info, (num_versions - 1));
31661
31662 /* The first version in the vector is the default decl. */
31663 default_decl = (*fndecls)[0];
31664
31665 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31666
31667 gseq = bb_seq (*empty_bb);
31668 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31669 constructors, so explicity call __builtin_cpu_init here. */
31670 ifunc_cpu_init_stmt = gimple_build_call_vec (
31671 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31672 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31673 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31674 set_bb_seq (*empty_bb, gseq);
31675
31676 pop_cfun ();
31677
31678
31679 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31680 {
31681 tree version_decl = ele;
31682 tree predicate_chain = NULL_TREE;
31683 unsigned int priority;
31684 /* Get attribute string, parse it and find the right predicate decl.
31685 The predicate function could be a lengthy combination of many
31686 features, like arch-type and various isa-variants. */
31687 priority = get_builtin_code_for_version (version_decl,
31688 &predicate_chain);
31689
31690 if (predicate_chain == NULL_TREE)
31691 continue;
31692
31693 function_version_info [actual_versions].version_decl = version_decl;
31694 function_version_info [actual_versions].predicate_chain
31695 = predicate_chain;
31696 function_version_info [actual_versions].dispatch_priority = priority;
31697 actual_versions++;
31698 }
31699
31700 /* Sort the versions according to descending order of dispatch priority. The
31701 priority is based on the ISA. This is not a perfect solution. There
31702 could still be ambiguity. If more than one function version is suitable
31703 to execute, which one should be dispatched? In future, allow the user
31704 to specify a dispatch priority next to the version. */
31705 qsort (function_version_info, actual_versions,
31706 sizeof (struct _function_version_info), feature_compare);
31707
31708 for (i = 0; i < actual_versions; ++i)
31709 *empty_bb = add_condition_to_bb (dispatch_decl,
31710 function_version_info[i].version_decl,
31711 function_version_info[i].predicate_chain,
31712 *empty_bb);
31713
31714 /* dispatch default version at the end. */
31715 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31716 NULL, *empty_bb);
31717
31718 free (function_version_info);
31719 return 0;
31720 }
31721
31722 /* Comparator function to be used in qsort routine to sort attribute
31723 specification strings to "target". */
31724
31725 static int
31726 attr_strcmp (const void *v1, const void *v2)
31727 {
31728 const char *c1 = *(char *const*)v1;
31729 const char *c2 = *(char *const*)v2;
31730 return strcmp (c1, c2);
31731 }
31732
31733 /* ARGLIST is the argument to target attribute. This function tokenizes
31734 the comma separated arguments, sorts them and returns a string which
31735 is a unique identifier for the comma separated arguments. It also
31736 replaces non-identifier characters "=,-" with "_". */
31737
31738 static char *
31739 sorted_attr_string (tree arglist)
31740 {
31741 tree arg;
31742 size_t str_len_sum = 0;
31743 char **args = NULL;
31744 char *attr_str, *ret_str;
31745 char *attr = NULL;
31746 unsigned int argnum = 1;
31747 unsigned int i;
31748
31749 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31750 {
31751 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31752 size_t len = strlen (str);
31753 str_len_sum += len + 1;
31754 if (arg != arglist)
31755 argnum++;
31756 for (i = 0; i < strlen (str); i++)
31757 if (str[i] == ',')
31758 argnum++;
31759 }
31760
31761 attr_str = XNEWVEC (char, str_len_sum);
31762 str_len_sum = 0;
31763 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31764 {
31765 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31766 size_t len = strlen (str);
31767 memcpy (attr_str + str_len_sum, str, len);
31768 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31769 str_len_sum += len + 1;
31770 }
31771
31772 /* Replace "=,-" with "_". */
31773 for (i = 0; i < strlen (attr_str); i++)
31774 if (attr_str[i] == '=' || attr_str[i]== '-')
31775 attr_str[i] = '_';
31776
31777 if (argnum == 1)
31778 return attr_str;
31779
31780 args = XNEWVEC (char *, argnum);
31781
31782 i = 0;
31783 attr = strtok (attr_str, ",");
31784 while (attr != NULL)
31785 {
31786 args[i] = attr;
31787 i++;
31788 attr = strtok (NULL, ",");
31789 }
31790
31791 qsort (args, argnum, sizeof (char *), attr_strcmp);
31792
31793 ret_str = XNEWVEC (char, str_len_sum);
31794 str_len_sum = 0;
31795 for (i = 0; i < argnum; i++)
31796 {
31797 size_t len = strlen (args[i]);
31798 memcpy (ret_str + str_len_sum, args[i], len);
31799 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31800 str_len_sum += len + 1;
31801 }
31802
31803 XDELETEVEC (args);
31804 XDELETEVEC (attr_str);
31805 return ret_str;
31806 }
31807
31808 /* This function changes the assembler name for functions that are
31809 versions. If DECL is a function version and has a "target"
31810 attribute, it appends the attribute string to its assembler name. */
31811
31812 static tree
31813 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31814 {
31815 tree version_attr;
31816 const char *orig_name, *version_string;
31817 char *attr_str, *assembler_name;
31818
31819 if (DECL_DECLARED_INLINE_P (decl)
31820 && lookup_attribute ("gnu_inline",
31821 DECL_ATTRIBUTES (decl)))
31822 error_at (DECL_SOURCE_LOCATION (decl),
31823 "Function versions cannot be marked as gnu_inline,"
31824 " bodies have to be generated");
31825
31826 if (DECL_VIRTUAL_P (decl)
31827 || DECL_VINDEX (decl))
31828 sorry ("Virtual function multiversioning not supported");
31829
31830 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31831
31832 /* target attribute string cannot be NULL. */
31833 gcc_assert (version_attr != NULL_TREE);
31834
31835 orig_name = IDENTIFIER_POINTER (id);
31836 version_string
31837 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31838
31839 if (strcmp (version_string, "default") == 0)
31840 return id;
31841
31842 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31843 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31844
31845 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31846
31847 /* Allow assembler name to be modified if already set. */
31848 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31849 SET_DECL_RTL (decl, NULL);
31850
31851 tree ret = get_identifier (assembler_name);
31852 XDELETEVEC (attr_str);
31853 XDELETEVEC (assembler_name);
31854 return ret;
31855 }
31856
31857 /* This function returns true if FN1 and FN2 are versions of the same function,
31858 that is, the target strings of the function decls are different. This assumes
31859 that FN1 and FN2 have the same signature. */
31860
31861 static bool
31862 ix86_function_versions (tree fn1, tree fn2)
31863 {
31864 tree attr1, attr2;
31865 char *target1, *target2;
31866 bool result;
31867
31868 if (TREE_CODE (fn1) != FUNCTION_DECL
31869 || TREE_CODE (fn2) != FUNCTION_DECL)
31870 return false;
31871
31872 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31873 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31874
31875 /* At least one function decl should have the target attribute specified. */
31876 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31877 return false;
31878
31879 /* Diagnose missing target attribute if one of the decls is already
31880 multi-versioned. */
31881 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31882 {
31883 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31884 {
31885 if (attr2 != NULL_TREE)
31886 {
31887 tree tem = fn1;
31888 fn1 = fn2;
31889 fn2 = tem;
31890 attr1 = attr2;
31891 }
31892 error_at (DECL_SOURCE_LOCATION (fn2),
31893 "missing %<target%> attribute for multi-versioned %D",
31894 fn2);
31895 inform (DECL_SOURCE_LOCATION (fn1),
31896 "previous declaration of %D", fn1);
31897 /* Prevent diagnosing of the same error multiple times. */
31898 DECL_ATTRIBUTES (fn2)
31899 = tree_cons (get_identifier ("target"),
31900 copy_node (TREE_VALUE (attr1)),
31901 DECL_ATTRIBUTES (fn2));
31902 }
31903 return false;
31904 }
31905
31906 target1 = sorted_attr_string (TREE_VALUE (attr1));
31907 target2 = sorted_attr_string (TREE_VALUE (attr2));
31908
31909 /* The sorted target strings must be different for fn1 and fn2
31910 to be versions. */
31911 if (strcmp (target1, target2) == 0)
31912 result = false;
31913 else
31914 result = true;
31915
31916 XDELETEVEC (target1);
31917 XDELETEVEC (target2);
31918
31919 return result;
31920 }
31921
31922 static tree
31923 ix86_mangle_decl_assembler_name (tree decl, tree id)
31924 {
31925 /* For function version, add the target suffix to the assembler name. */
31926 if (TREE_CODE (decl) == FUNCTION_DECL
31927 && DECL_FUNCTION_VERSIONED (decl))
31928 id = ix86_mangle_function_version_assembler_name (decl, id);
31929 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31930 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31931 #endif
31932
31933 return id;
31934 }
31935
31936 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31937 is true, append the full path name of the source file. */
31938
31939 static char *
31940 make_name (tree decl, const char *suffix, bool make_unique)
31941 {
31942 char *global_var_name;
31943 int name_len;
31944 const char *name;
31945 const char *unique_name = NULL;
31946
31947 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31948
31949 /* Get a unique name that can be used globally without any chances
31950 of collision at link time. */
31951 if (make_unique)
31952 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31953
31954 name_len = strlen (name) + strlen (suffix) + 2;
31955
31956 if (make_unique)
31957 name_len += strlen (unique_name) + 1;
31958 global_var_name = XNEWVEC (char, name_len);
31959
31960 /* Use '.' to concatenate names as it is demangler friendly. */
31961 if (make_unique)
31962 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31963 suffix);
31964 else
31965 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31966
31967 return global_var_name;
31968 }
31969
31970 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31971
31972 /* Make a dispatcher declaration for the multi-versioned function DECL.
31973 Calls to DECL function will be replaced with calls to the dispatcher
31974 by the front-end. Return the decl created. */
31975
31976 static tree
31977 make_dispatcher_decl (const tree decl)
31978 {
31979 tree func_decl;
31980 char *func_name;
31981 tree fn_type, func_type;
31982 bool is_uniq = false;
31983
31984 if (TREE_PUBLIC (decl) == 0)
31985 is_uniq = true;
31986
31987 func_name = make_name (decl, "ifunc", is_uniq);
31988
31989 fn_type = TREE_TYPE (decl);
31990 func_type = build_function_type (TREE_TYPE (fn_type),
31991 TYPE_ARG_TYPES (fn_type));
31992
31993 func_decl = build_fn_decl (func_name, func_type);
31994 XDELETEVEC (func_name);
31995 TREE_USED (func_decl) = 1;
31996 DECL_CONTEXT (func_decl) = NULL_TREE;
31997 DECL_INITIAL (func_decl) = error_mark_node;
31998 DECL_ARTIFICIAL (func_decl) = 1;
31999 /* Mark this func as external, the resolver will flip it again if
32000 it gets generated. */
32001 DECL_EXTERNAL (func_decl) = 1;
32002 /* This will be of type IFUNCs have to be externally visible. */
32003 TREE_PUBLIC (func_decl) = 1;
32004
32005 return func_decl;
32006 }
32007
32008 #endif
32009
32010 /* Returns true if decl is multi-versioned and DECL is the default function,
32011 that is it is not tagged with target specific optimization. */
32012
32013 static bool
32014 is_function_default_version (const tree decl)
32015 {
32016 if (TREE_CODE (decl) != FUNCTION_DECL
32017 || !DECL_FUNCTION_VERSIONED (decl))
32018 return false;
32019 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32020 gcc_assert (attr);
32021 attr = TREE_VALUE (TREE_VALUE (attr));
32022 return (TREE_CODE (attr) == STRING_CST
32023 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32024 }
32025
32026 /* Make a dispatcher declaration for the multi-versioned function DECL.
32027 Calls to DECL function will be replaced with calls to the dispatcher
32028 by the front-end. Returns the decl of the dispatcher function. */
32029
32030 static tree
32031 ix86_get_function_versions_dispatcher (void *decl)
32032 {
32033 tree fn = (tree) decl;
32034 struct cgraph_node *node = NULL;
32035 struct cgraph_node *default_node = NULL;
32036 struct cgraph_function_version_info *node_v = NULL;
32037 struct cgraph_function_version_info *first_v = NULL;
32038
32039 tree dispatch_decl = NULL;
32040
32041 struct cgraph_function_version_info *default_version_info = NULL;
32042
32043 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32044
32045 node = cgraph_get_node (fn);
32046 gcc_assert (node != NULL);
32047
32048 node_v = get_cgraph_node_version (node);
32049 gcc_assert (node_v != NULL);
32050
32051 if (node_v->dispatcher_resolver != NULL)
32052 return node_v->dispatcher_resolver;
32053
32054 /* Find the default version and make it the first node. */
32055 first_v = node_v;
32056 /* Go to the beginning of the chain. */
32057 while (first_v->prev != NULL)
32058 first_v = first_v->prev;
32059 default_version_info = first_v;
32060 while (default_version_info != NULL)
32061 {
32062 if (is_function_default_version
32063 (default_version_info->this_node->decl))
32064 break;
32065 default_version_info = default_version_info->next;
32066 }
32067
32068 /* If there is no default node, just return NULL. */
32069 if (default_version_info == NULL)
32070 return NULL;
32071
32072 /* Make default info the first node. */
32073 if (first_v != default_version_info)
32074 {
32075 default_version_info->prev->next = default_version_info->next;
32076 if (default_version_info->next)
32077 default_version_info->next->prev = default_version_info->prev;
32078 first_v->prev = default_version_info;
32079 default_version_info->next = first_v;
32080 default_version_info->prev = NULL;
32081 }
32082
32083 default_node = default_version_info->this_node;
32084
32085 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32086 if (targetm.has_ifunc_p ())
32087 {
32088 struct cgraph_function_version_info *it_v = NULL;
32089 struct cgraph_node *dispatcher_node = NULL;
32090 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32091
32092 /* Right now, the dispatching is done via ifunc. */
32093 dispatch_decl = make_dispatcher_decl (default_node->decl);
32094
32095 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32096 gcc_assert (dispatcher_node != NULL);
32097 dispatcher_node->dispatcher_function = 1;
32098 dispatcher_version_info
32099 = insert_new_cgraph_node_version (dispatcher_node);
32100 dispatcher_version_info->next = default_version_info;
32101 dispatcher_node->definition = 1;
32102
32103 /* Set the dispatcher for all the versions. */
32104 it_v = default_version_info;
32105 while (it_v != NULL)
32106 {
32107 it_v->dispatcher_resolver = dispatch_decl;
32108 it_v = it_v->next;
32109 }
32110 }
32111 else
32112 #endif
32113 {
32114 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32115 "multiversioning needs ifunc which is not supported "
32116 "on this target");
32117 }
32118
32119 return dispatch_decl;
32120 }
32121
32122 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32123 it to CHAIN. */
32124
32125 static tree
32126 make_attribute (const char *name, const char *arg_name, tree chain)
32127 {
32128 tree attr_name;
32129 tree attr_arg_name;
32130 tree attr_args;
32131 tree attr;
32132
32133 attr_name = get_identifier (name);
32134 attr_arg_name = build_string (strlen (arg_name), arg_name);
32135 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32136 attr = tree_cons (attr_name, attr_args, chain);
32137 return attr;
32138 }
32139
32140 /* Make the resolver function decl to dispatch the versions of
32141 a multi-versioned function, DEFAULT_DECL. Create an
32142 empty basic block in the resolver and store the pointer in
32143 EMPTY_BB. Return the decl of the resolver function. */
32144
32145 static tree
32146 make_resolver_func (const tree default_decl,
32147 const tree dispatch_decl,
32148 basic_block *empty_bb)
32149 {
32150 char *resolver_name;
32151 tree decl, type, decl_name, t;
32152 bool is_uniq = false;
32153
32154 /* IFUNC's have to be globally visible. So, if the default_decl is
32155 not, then the name of the IFUNC should be made unique. */
32156 if (TREE_PUBLIC (default_decl) == 0)
32157 is_uniq = true;
32158
32159 /* Append the filename to the resolver function if the versions are
32160 not externally visible. This is because the resolver function has
32161 to be externally visible for the loader to find it. So, appending
32162 the filename will prevent conflicts with a resolver function from
32163 another module which is based on the same version name. */
32164 resolver_name = make_name (default_decl, "resolver", is_uniq);
32165
32166 /* The resolver function should return a (void *). */
32167 type = build_function_type_list (ptr_type_node, NULL_TREE);
32168
32169 decl = build_fn_decl (resolver_name, type);
32170 decl_name = get_identifier (resolver_name);
32171 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32172
32173 DECL_NAME (decl) = decl_name;
32174 TREE_USED (decl) = 1;
32175 DECL_ARTIFICIAL (decl) = 1;
32176 DECL_IGNORED_P (decl) = 0;
32177 /* IFUNC resolvers have to be externally visible. */
32178 TREE_PUBLIC (decl) = 1;
32179 DECL_UNINLINABLE (decl) = 1;
32180
32181 /* Resolver is not external, body is generated. */
32182 DECL_EXTERNAL (decl) = 0;
32183 DECL_EXTERNAL (dispatch_decl) = 0;
32184
32185 DECL_CONTEXT (decl) = NULL_TREE;
32186 DECL_INITIAL (decl) = make_node (BLOCK);
32187 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32188
32189 if (DECL_COMDAT_GROUP (default_decl)
32190 || TREE_PUBLIC (default_decl))
32191 {
32192 /* In this case, each translation unit with a call to this
32193 versioned function will put out a resolver. Ensure it
32194 is comdat to keep just one copy. */
32195 DECL_COMDAT (decl) = 1;
32196 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32197 }
32198 /* Build result decl and add to function_decl. */
32199 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32200 DECL_ARTIFICIAL (t) = 1;
32201 DECL_IGNORED_P (t) = 1;
32202 DECL_RESULT (decl) = t;
32203
32204 gimplify_function_tree (decl);
32205 push_cfun (DECL_STRUCT_FUNCTION (decl));
32206 *empty_bb = init_lowered_empty_function (decl, false);
32207
32208 cgraph_add_new_function (decl, true);
32209 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32210
32211 pop_cfun ();
32212
32213 gcc_assert (dispatch_decl != NULL);
32214 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32215 DECL_ATTRIBUTES (dispatch_decl)
32216 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32217
32218 /* Create the alias for dispatch to resolver here. */
32219 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32220 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32221 XDELETEVEC (resolver_name);
32222 return decl;
32223 }
32224
32225 /* Generate the dispatching code body to dispatch multi-versioned function
32226 DECL. The target hook is called to process the "target" attributes and
32227 provide the code to dispatch the right function at run-time. NODE points
32228 to the dispatcher decl whose body will be created. */
32229
32230 static tree
32231 ix86_generate_version_dispatcher_body (void *node_p)
32232 {
32233 tree resolver_decl;
32234 basic_block empty_bb;
32235 tree default_ver_decl;
32236 struct cgraph_node *versn;
32237 struct cgraph_node *node;
32238
32239 struct cgraph_function_version_info *node_version_info = NULL;
32240 struct cgraph_function_version_info *versn_info = NULL;
32241
32242 node = (cgraph_node *)node_p;
32243
32244 node_version_info = get_cgraph_node_version (node);
32245 gcc_assert (node->dispatcher_function
32246 && node_version_info != NULL);
32247
32248 if (node_version_info->dispatcher_resolver)
32249 return node_version_info->dispatcher_resolver;
32250
32251 /* The first version in the chain corresponds to the default version. */
32252 default_ver_decl = node_version_info->next->this_node->decl;
32253
32254 /* node is going to be an alias, so remove the finalized bit. */
32255 node->definition = false;
32256
32257 resolver_decl = make_resolver_func (default_ver_decl,
32258 node->decl, &empty_bb);
32259
32260 node_version_info->dispatcher_resolver = resolver_decl;
32261
32262 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32263
32264 auto_vec<tree, 2> fn_ver_vec;
32265
32266 for (versn_info = node_version_info->next; versn_info;
32267 versn_info = versn_info->next)
32268 {
32269 versn = versn_info->this_node;
32270 /* Check for virtual functions here again, as by this time it should
32271 have been determined if this function needs a vtable index or
32272 not. This happens for methods in derived classes that override
32273 virtual methods in base classes but are not explicitly marked as
32274 virtual. */
32275 if (DECL_VINDEX (versn->decl))
32276 sorry ("Virtual function multiversioning not supported");
32277
32278 fn_ver_vec.safe_push (versn->decl);
32279 }
32280
32281 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32282 rebuild_cgraph_edges ();
32283 pop_cfun ();
32284 return resolver_decl;
32285 }
32286 /* This builds the processor_model struct type defined in
32287 libgcc/config/i386/cpuinfo.c */
32288
32289 static tree
32290 build_processor_model_struct (void)
32291 {
32292 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32293 "__cpu_features"};
32294 tree field = NULL_TREE, field_chain = NULL_TREE;
32295 int i;
32296 tree type = make_node (RECORD_TYPE);
32297
32298 /* The first 3 fields are unsigned int. */
32299 for (i = 0; i < 3; ++i)
32300 {
32301 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32302 get_identifier (field_name[i]), unsigned_type_node);
32303 if (field_chain != NULL_TREE)
32304 DECL_CHAIN (field) = field_chain;
32305 field_chain = field;
32306 }
32307
32308 /* The last field is an array of unsigned integers of size one. */
32309 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32310 get_identifier (field_name[3]),
32311 build_array_type (unsigned_type_node,
32312 build_index_type (size_one_node)));
32313 if (field_chain != NULL_TREE)
32314 DECL_CHAIN (field) = field_chain;
32315 field_chain = field;
32316
32317 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32318 return type;
32319 }
32320
32321 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32322
32323 static tree
32324 make_var_decl (tree type, const char *name)
32325 {
32326 tree new_decl;
32327
32328 new_decl = build_decl (UNKNOWN_LOCATION,
32329 VAR_DECL,
32330 get_identifier(name),
32331 type);
32332
32333 DECL_EXTERNAL (new_decl) = 1;
32334 TREE_STATIC (new_decl) = 1;
32335 TREE_PUBLIC (new_decl) = 1;
32336 DECL_INITIAL (new_decl) = 0;
32337 DECL_ARTIFICIAL (new_decl) = 0;
32338 DECL_PRESERVE_P (new_decl) = 1;
32339
32340 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32341 assemble_variable (new_decl, 0, 0, 0);
32342
32343 return new_decl;
32344 }
32345
32346 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32347 into an integer defined in libgcc/config/i386/cpuinfo.c */
32348
32349 static tree
32350 fold_builtin_cpu (tree fndecl, tree *args)
32351 {
32352 unsigned int i;
32353 enum ix86_builtins fn_code = (enum ix86_builtins)
32354 DECL_FUNCTION_CODE (fndecl);
32355 tree param_string_cst = NULL;
32356
32357 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32358 enum processor_features
32359 {
32360 F_CMOV = 0,
32361 F_MMX,
32362 F_POPCNT,
32363 F_SSE,
32364 F_SSE2,
32365 F_SSE3,
32366 F_SSSE3,
32367 F_SSE4_1,
32368 F_SSE4_2,
32369 F_AVX,
32370 F_AVX2,
32371 F_SSE4_A,
32372 F_FMA4,
32373 F_XOP,
32374 F_FMA,
32375 F_MAX
32376 };
32377
32378 /* These are the values for vendor types and cpu types and subtypes
32379 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32380 the corresponding start value. */
32381 enum processor_model
32382 {
32383 M_INTEL = 1,
32384 M_AMD,
32385 M_CPU_TYPE_START,
32386 M_INTEL_BONNELL,
32387 M_INTEL_CORE2,
32388 M_INTEL_COREI7,
32389 M_AMDFAM10H,
32390 M_AMDFAM15H,
32391 M_INTEL_SILVERMONT,
32392 M_AMD_BTVER1,
32393 M_AMD_BTVER2,
32394 M_CPU_SUBTYPE_START,
32395 M_INTEL_COREI7_NEHALEM,
32396 M_INTEL_COREI7_WESTMERE,
32397 M_INTEL_COREI7_SANDYBRIDGE,
32398 M_AMDFAM10H_BARCELONA,
32399 M_AMDFAM10H_SHANGHAI,
32400 M_AMDFAM10H_ISTANBUL,
32401 M_AMDFAM15H_BDVER1,
32402 M_AMDFAM15H_BDVER2,
32403 M_AMDFAM15H_BDVER3,
32404 M_AMDFAM15H_BDVER4,
32405 M_INTEL_COREI7_IVYBRIDGE,
32406 M_INTEL_COREI7_HASWELL
32407 };
32408
32409 static struct _arch_names_table
32410 {
32411 const char *const name;
32412 const enum processor_model model;
32413 }
32414 const arch_names_table[] =
32415 {
32416 {"amd", M_AMD},
32417 {"intel", M_INTEL},
32418 {"atom", M_INTEL_BONNELL},
32419 {"slm", M_INTEL_SILVERMONT},
32420 {"core2", M_INTEL_CORE2},
32421 {"corei7", M_INTEL_COREI7},
32422 {"nehalem", M_INTEL_COREI7_NEHALEM},
32423 {"westmere", M_INTEL_COREI7_WESTMERE},
32424 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32425 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32426 {"haswell", M_INTEL_COREI7_HASWELL},
32427 {"bonnell", M_INTEL_BONNELL},
32428 {"silvermont", M_INTEL_SILVERMONT},
32429 {"amdfam10h", M_AMDFAM10H},
32430 {"barcelona", M_AMDFAM10H_BARCELONA},
32431 {"shanghai", M_AMDFAM10H_SHANGHAI},
32432 {"istanbul", M_AMDFAM10H_ISTANBUL},
32433 {"btver1", M_AMD_BTVER1},
32434 {"amdfam15h", M_AMDFAM15H},
32435 {"bdver1", M_AMDFAM15H_BDVER1},
32436 {"bdver2", M_AMDFAM15H_BDVER2},
32437 {"bdver3", M_AMDFAM15H_BDVER3},
32438 {"bdver4", M_AMDFAM15H_BDVER4},
32439 {"btver2", M_AMD_BTVER2},
32440 };
32441
32442 static struct _isa_names_table
32443 {
32444 const char *const name;
32445 const enum processor_features feature;
32446 }
32447 const isa_names_table[] =
32448 {
32449 {"cmov", F_CMOV},
32450 {"mmx", F_MMX},
32451 {"popcnt", F_POPCNT},
32452 {"sse", F_SSE},
32453 {"sse2", F_SSE2},
32454 {"sse3", F_SSE3},
32455 {"ssse3", F_SSSE3},
32456 {"sse4a", F_SSE4_A},
32457 {"sse4.1", F_SSE4_1},
32458 {"sse4.2", F_SSE4_2},
32459 {"avx", F_AVX},
32460 {"fma4", F_FMA4},
32461 {"xop", F_XOP},
32462 {"fma", F_FMA},
32463 {"avx2", F_AVX2}
32464 };
32465
32466 tree __processor_model_type = build_processor_model_struct ();
32467 tree __cpu_model_var = make_var_decl (__processor_model_type,
32468 "__cpu_model");
32469
32470
32471 varpool_add_new_variable (__cpu_model_var);
32472
32473 gcc_assert ((args != NULL) && (*args != NULL));
32474
32475 param_string_cst = *args;
32476 while (param_string_cst
32477 && TREE_CODE (param_string_cst) != STRING_CST)
32478 {
32479 /* *args must be a expr that can contain other EXPRS leading to a
32480 STRING_CST. */
32481 if (!EXPR_P (param_string_cst))
32482 {
32483 error ("Parameter to builtin must be a string constant or literal");
32484 return integer_zero_node;
32485 }
32486 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32487 }
32488
32489 gcc_assert (param_string_cst);
32490
32491 if (fn_code == IX86_BUILTIN_CPU_IS)
32492 {
32493 tree ref;
32494 tree field;
32495 tree final;
32496
32497 unsigned int field_val = 0;
32498 unsigned int NUM_ARCH_NAMES
32499 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32500
32501 for (i = 0; i < NUM_ARCH_NAMES; i++)
32502 if (strcmp (arch_names_table[i].name,
32503 TREE_STRING_POINTER (param_string_cst)) == 0)
32504 break;
32505
32506 if (i == NUM_ARCH_NAMES)
32507 {
32508 error ("Parameter to builtin not valid: %s",
32509 TREE_STRING_POINTER (param_string_cst));
32510 return integer_zero_node;
32511 }
32512
32513 field = TYPE_FIELDS (__processor_model_type);
32514 field_val = arch_names_table[i].model;
32515
32516 /* CPU types are stored in the next field. */
32517 if (field_val > M_CPU_TYPE_START
32518 && field_val < M_CPU_SUBTYPE_START)
32519 {
32520 field = DECL_CHAIN (field);
32521 field_val -= M_CPU_TYPE_START;
32522 }
32523
32524 /* CPU subtypes are stored in the next field. */
32525 if (field_val > M_CPU_SUBTYPE_START)
32526 {
32527 field = DECL_CHAIN ( DECL_CHAIN (field));
32528 field_val -= M_CPU_SUBTYPE_START;
32529 }
32530
32531 /* Get the appropriate field in __cpu_model. */
32532 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32533 field, NULL_TREE);
32534
32535 /* Check the value. */
32536 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32537 build_int_cstu (unsigned_type_node, field_val));
32538 return build1 (CONVERT_EXPR, integer_type_node, final);
32539 }
32540 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32541 {
32542 tree ref;
32543 tree array_elt;
32544 tree field;
32545 tree final;
32546
32547 unsigned int field_val = 0;
32548 unsigned int NUM_ISA_NAMES
32549 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32550
32551 for (i = 0; i < NUM_ISA_NAMES; i++)
32552 if (strcmp (isa_names_table[i].name,
32553 TREE_STRING_POINTER (param_string_cst)) == 0)
32554 break;
32555
32556 if (i == NUM_ISA_NAMES)
32557 {
32558 error ("Parameter to builtin not valid: %s",
32559 TREE_STRING_POINTER (param_string_cst));
32560 return integer_zero_node;
32561 }
32562
32563 field = TYPE_FIELDS (__processor_model_type);
32564 /* Get the last field, which is __cpu_features. */
32565 while (DECL_CHAIN (field))
32566 field = DECL_CHAIN (field);
32567
32568 /* Get the appropriate field: __cpu_model.__cpu_features */
32569 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32570 field, NULL_TREE);
32571
32572 /* Access the 0th element of __cpu_features array. */
32573 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32574 integer_zero_node, NULL_TREE, NULL_TREE);
32575
32576 field_val = (1 << isa_names_table[i].feature);
32577 /* Return __cpu_model.__cpu_features[0] & field_val */
32578 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32579 build_int_cstu (unsigned_type_node, field_val));
32580 return build1 (CONVERT_EXPR, integer_type_node, final);
32581 }
32582 gcc_unreachable ();
32583 }
32584
32585 static tree
32586 ix86_fold_builtin (tree fndecl, int n_args,
32587 tree *args, bool ignore ATTRIBUTE_UNUSED)
32588 {
32589 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32590 {
32591 enum ix86_builtins fn_code = (enum ix86_builtins)
32592 DECL_FUNCTION_CODE (fndecl);
32593 if (fn_code == IX86_BUILTIN_CPU_IS
32594 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32595 {
32596 gcc_assert (n_args == 1);
32597 return fold_builtin_cpu (fndecl, args);
32598 }
32599 }
32600
32601 #ifdef SUBTARGET_FOLD_BUILTIN
32602 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32603 #endif
32604
32605 return NULL_TREE;
32606 }
32607
32608 /* Make builtins to detect cpu type and features supported. NAME is
32609 the builtin name, CODE is the builtin code, and FTYPE is the function
32610 type of the builtin. */
32611
32612 static void
32613 make_cpu_type_builtin (const char* name, int code,
32614 enum ix86_builtin_func_type ftype, bool is_const)
32615 {
32616 tree decl;
32617 tree type;
32618
32619 type = ix86_get_builtin_func_type (ftype);
32620 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32621 NULL, NULL_TREE);
32622 gcc_assert (decl != NULL_TREE);
32623 ix86_builtins[(int) code] = decl;
32624 TREE_READONLY (decl) = is_const;
32625 }
32626
32627 /* Make builtins to get CPU type and features supported. The created
32628 builtins are :
32629
32630 __builtin_cpu_init (), to detect cpu type and features,
32631 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32632 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32633 */
32634
32635 static void
32636 ix86_init_platform_type_builtins (void)
32637 {
32638 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32639 INT_FTYPE_VOID, false);
32640 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32641 INT_FTYPE_PCCHAR, true);
32642 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32643 INT_FTYPE_PCCHAR, true);
32644 }
32645
32646 /* Internal method for ix86_init_builtins. */
32647
32648 static void
32649 ix86_init_builtins_va_builtins_abi (void)
32650 {
32651 tree ms_va_ref, sysv_va_ref;
32652 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32653 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32654 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32655 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32656
32657 if (!TARGET_64BIT)
32658 return;
32659 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32660 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32661 ms_va_ref = build_reference_type (ms_va_list_type_node);
32662 sysv_va_ref =
32663 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32664
32665 fnvoid_va_end_ms =
32666 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32667 fnvoid_va_start_ms =
32668 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32669 fnvoid_va_end_sysv =
32670 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32671 fnvoid_va_start_sysv =
32672 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32673 NULL_TREE);
32674 fnvoid_va_copy_ms =
32675 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32676 NULL_TREE);
32677 fnvoid_va_copy_sysv =
32678 build_function_type_list (void_type_node, sysv_va_ref,
32679 sysv_va_ref, NULL_TREE);
32680
32681 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32682 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32683 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32684 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32685 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32686 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32687 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32688 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32689 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32690 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32691 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32692 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32693 }
32694
32695 static void
32696 ix86_init_builtin_types (void)
32697 {
32698 tree float128_type_node, float80_type_node;
32699
32700 /* The __float80 type. */
32701 float80_type_node = long_double_type_node;
32702 if (TYPE_MODE (float80_type_node) != XFmode)
32703 {
32704 /* The __float80 type. */
32705 float80_type_node = make_node (REAL_TYPE);
32706
32707 TYPE_PRECISION (float80_type_node) = 80;
32708 layout_type (float80_type_node);
32709 }
32710 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32711
32712 /* The __float128 type. */
32713 float128_type_node = make_node (REAL_TYPE);
32714 TYPE_PRECISION (float128_type_node) = 128;
32715 layout_type (float128_type_node);
32716 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32717
32718 /* This macro is built by i386-builtin-types.awk. */
32719 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32720 }
32721
32722 static void
32723 ix86_init_builtins (void)
32724 {
32725 tree t;
32726
32727 ix86_init_builtin_types ();
32728
32729 /* Builtins to get CPU type and features. */
32730 ix86_init_platform_type_builtins ();
32731
32732 /* TFmode support builtins. */
32733 def_builtin_const (0, "__builtin_infq",
32734 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32735 def_builtin_const (0, "__builtin_huge_valq",
32736 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32737
32738 /* We will expand them to normal call if SSE isn't available since
32739 they are used by libgcc. */
32740 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32741 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32742 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32743 TREE_READONLY (t) = 1;
32744 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32745
32746 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32747 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32748 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32749 TREE_READONLY (t) = 1;
32750 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32751
32752 ix86_init_tm_builtins ();
32753 ix86_init_mmx_sse_builtins ();
32754
32755 if (TARGET_LP64)
32756 ix86_init_builtins_va_builtins_abi ();
32757
32758 #ifdef SUBTARGET_INIT_BUILTINS
32759 SUBTARGET_INIT_BUILTINS;
32760 #endif
32761 }
32762
32763 /* Return the ix86 builtin for CODE. */
32764
32765 static tree
32766 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32767 {
32768 if (code >= IX86_BUILTIN_MAX)
32769 return error_mark_node;
32770
32771 return ix86_builtins[code];
32772 }
32773
32774 /* Errors in the source file can cause expand_expr to return const0_rtx
32775 where we expect a vector. To avoid crashing, use one of the vector
32776 clear instructions. */
32777 static rtx
32778 safe_vector_operand (rtx x, enum machine_mode mode)
32779 {
32780 if (x == const0_rtx)
32781 x = CONST0_RTX (mode);
32782 return x;
32783 }
32784
32785 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32786
32787 static rtx
32788 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32789 {
32790 rtx pat;
32791 tree arg0 = CALL_EXPR_ARG (exp, 0);
32792 tree arg1 = CALL_EXPR_ARG (exp, 1);
32793 rtx op0 = expand_normal (arg0);
32794 rtx op1 = expand_normal (arg1);
32795 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32796 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32797 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32798
32799 if (VECTOR_MODE_P (mode0))
32800 op0 = safe_vector_operand (op0, mode0);
32801 if (VECTOR_MODE_P (mode1))
32802 op1 = safe_vector_operand (op1, mode1);
32803
32804 if (optimize || !target
32805 || GET_MODE (target) != tmode
32806 || !insn_data[icode].operand[0].predicate (target, tmode))
32807 target = gen_reg_rtx (tmode);
32808
32809 if (GET_MODE (op1) == SImode && mode1 == TImode)
32810 {
32811 rtx x = gen_reg_rtx (V4SImode);
32812 emit_insn (gen_sse2_loadd (x, op1));
32813 op1 = gen_lowpart (TImode, x);
32814 }
32815
32816 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32817 op0 = copy_to_mode_reg (mode0, op0);
32818 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32819 op1 = copy_to_mode_reg (mode1, op1);
32820
32821 pat = GEN_FCN (icode) (target, op0, op1);
32822 if (! pat)
32823 return 0;
32824
32825 emit_insn (pat);
32826
32827 return target;
32828 }
32829
32830 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32831
32832 static rtx
32833 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32834 enum ix86_builtin_func_type m_type,
32835 enum rtx_code sub_code)
32836 {
32837 rtx pat;
32838 int i;
32839 int nargs;
32840 bool comparison_p = false;
32841 bool tf_p = false;
32842 bool last_arg_constant = false;
32843 int num_memory = 0;
32844 struct {
32845 rtx op;
32846 enum machine_mode mode;
32847 } args[4];
32848
32849 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32850
32851 switch (m_type)
32852 {
32853 case MULTI_ARG_4_DF2_DI_I:
32854 case MULTI_ARG_4_DF2_DI_I1:
32855 case MULTI_ARG_4_SF2_SI_I:
32856 case MULTI_ARG_4_SF2_SI_I1:
32857 nargs = 4;
32858 last_arg_constant = true;
32859 break;
32860
32861 case MULTI_ARG_3_SF:
32862 case MULTI_ARG_3_DF:
32863 case MULTI_ARG_3_SF2:
32864 case MULTI_ARG_3_DF2:
32865 case MULTI_ARG_3_DI:
32866 case MULTI_ARG_3_SI:
32867 case MULTI_ARG_3_SI_DI:
32868 case MULTI_ARG_3_HI:
32869 case MULTI_ARG_3_HI_SI:
32870 case MULTI_ARG_3_QI:
32871 case MULTI_ARG_3_DI2:
32872 case MULTI_ARG_3_SI2:
32873 case MULTI_ARG_3_HI2:
32874 case MULTI_ARG_3_QI2:
32875 nargs = 3;
32876 break;
32877
32878 case MULTI_ARG_2_SF:
32879 case MULTI_ARG_2_DF:
32880 case MULTI_ARG_2_DI:
32881 case MULTI_ARG_2_SI:
32882 case MULTI_ARG_2_HI:
32883 case MULTI_ARG_2_QI:
32884 nargs = 2;
32885 break;
32886
32887 case MULTI_ARG_2_DI_IMM:
32888 case MULTI_ARG_2_SI_IMM:
32889 case MULTI_ARG_2_HI_IMM:
32890 case MULTI_ARG_2_QI_IMM:
32891 nargs = 2;
32892 last_arg_constant = true;
32893 break;
32894
32895 case MULTI_ARG_1_SF:
32896 case MULTI_ARG_1_DF:
32897 case MULTI_ARG_1_SF2:
32898 case MULTI_ARG_1_DF2:
32899 case MULTI_ARG_1_DI:
32900 case MULTI_ARG_1_SI:
32901 case MULTI_ARG_1_HI:
32902 case MULTI_ARG_1_QI:
32903 case MULTI_ARG_1_SI_DI:
32904 case MULTI_ARG_1_HI_DI:
32905 case MULTI_ARG_1_HI_SI:
32906 case MULTI_ARG_1_QI_DI:
32907 case MULTI_ARG_1_QI_SI:
32908 case MULTI_ARG_1_QI_HI:
32909 nargs = 1;
32910 break;
32911
32912 case MULTI_ARG_2_DI_CMP:
32913 case MULTI_ARG_2_SI_CMP:
32914 case MULTI_ARG_2_HI_CMP:
32915 case MULTI_ARG_2_QI_CMP:
32916 nargs = 2;
32917 comparison_p = true;
32918 break;
32919
32920 case MULTI_ARG_2_SF_TF:
32921 case MULTI_ARG_2_DF_TF:
32922 case MULTI_ARG_2_DI_TF:
32923 case MULTI_ARG_2_SI_TF:
32924 case MULTI_ARG_2_HI_TF:
32925 case MULTI_ARG_2_QI_TF:
32926 nargs = 2;
32927 tf_p = true;
32928 break;
32929
32930 default:
32931 gcc_unreachable ();
32932 }
32933
32934 if (optimize || !target
32935 || GET_MODE (target) != tmode
32936 || !insn_data[icode].operand[0].predicate (target, tmode))
32937 target = gen_reg_rtx (tmode);
32938
32939 gcc_assert (nargs <= 4);
32940
32941 for (i = 0; i < nargs; i++)
32942 {
32943 tree arg = CALL_EXPR_ARG (exp, i);
32944 rtx op = expand_normal (arg);
32945 int adjust = (comparison_p) ? 1 : 0;
32946 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32947
32948 if (last_arg_constant && i == nargs - 1)
32949 {
32950 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32951 {
32952 enum insn_code new_icode = icode;
32953 switch (icode)
32954 {
32955 case CODE_FOR_xop_vpermil2v2df3:
32956 case CODE_FOR_xop_vpermil2v4sf3:
32957 case CODE_FOR_xop_vpermil2v4df3:
32958 case CODE_FOR_xop_vpermil2v8sf3:
32959 error ("the last argument must be a 2-bit immediate");
32960 return gen_reg_rtx (tmode);
32961 case CODE_FOR_xop_rotlv2di3:
32962 new_icode = CODE_FOR_rotlv2di3;
32963 goto xop_rotl;
32964 case CODE_FOR_xop_rotlv4si3:
32965 new_icode = CODE_FOR_rotlv4si3;
32966 goto xop_rotl;
32967 case CODE_FOR_xop_rotlv8hi3:
32968 new_icode = CODE_FOR_rotlv8hi3;
32969 goto xop_rotl;
32970 case CODE_FOR_xop_rotlv16qi3:
32971 new_icode = CODE_FOR_rotlv16qi3;
32972 xop_rotl:
32973 if (CONST_INT_P (op))
32974 {
32975 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32976 op = GEN_INT (INTVAL (op) & mask);
32977 gcc_checking_assert
32978 (insn_data[icode].operand[i + 1].predicate (op, mode));
32979 }
32980 else
32981 {
32982 gcc_checking_assert
32983 (nargs == 2
32984 && insn_data[new_icode].operand[0].mode == tmode
32985 && insn_data[new_icode].operand[1].mode == tmode
32986 && insn_data[new_icode].operand[2].mode == mode
32987 && insn_data[new_icode].operand[0].predicate
32988 == insn_data[icode].operand[0].predicate
32989 && insn_data[new_icode].operand[1].predicate
32990 == insn_data[icode].operand[1].predicate);
32991 icode = new_icode;
32992 goto non_constant;
32993 }
32994 break;
32995 default:
32996 gcc_unreachable ();
32997 }
32998 }
32999 }
33000 else
33001 {
33002 non_constant:
33003 if (VECTOR_MODE_P (mode))
33004 op = safe_vector_operand (op, mode);
33005
33006 /* If we aren't optimizing, only allow one memory operand to be
33007 generated. */
33008 if (memory_operand (op, mode))
33009 num_memory++;
33010
33011 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33012
33013 if (optimize
33014 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33015 || num_memory > 1)
33016 op = force_reg (mode, op);
33017 }
33018
33019 args[i].op = op;
33020 args[i].mode = mode;
33021 }
33022
33023 switch (nargs)
33024 {
33025 case 1:
33026 pat = GEN_FCN (icode) (target, args[0].op);
33027 break;
33028
33029 case 2:
33030 if (tf_p)
33031 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33032 GEN_INT ((int)sub_code));
33033 else if (! comparison_p)
33034 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33035 else
33036 {
33037 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33038 args[0].op,
33039 args[1].op);
33040
33041 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33042 }
33043 break;
33044
33045 case 3:
33046 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33047 break;
33048
33049 case 4:
33050 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33051 break;
33052
33053 default:
33054 gcc_unreachable ();
33055 }
33056
33057 if (! pat)
33058 return 0;
33059
33060 emit_insn (pat);
33061 return target;
33062 }
33063
33064 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33065 insns with vec_merge. */
33066
33067 static rtx
33068 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33069 rtx target)
33070 {
33071 rtx pat;
33072 tree arg0 = CALL_EXPR_ARG (exp, 0);
33073 rtx op1, op0 = expand_normal (arg0);
33074 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33075 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33076
33077 if (optimize || !target
33078 || GET_MODE (target) != tmode
33079 || !insn_data[icode].operand[0].predicate (target, tmode))
33080 target = gen_reg_rtx (tmode);
33081
33082 if (VECTOR_MODE_P (mode0))
33083 op0 = safe_vector_operand (op0, mode0);
33084
33085 if ((optimize && !register_operand (op0, mode0))
33086 || !insn_data[icode].operand[1].predicate (op0, mode0))
33087 op0 = copy_to_mode_reg (mode0, op0);
33088
33089 op1 = op0;
33090 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33091 op1 = copy_to_mode_reg (mode0, op1);
33092
33093 pat = GEN_FCN (icode) (target, op0, op1);
33094 if (! pat)
33095 return 0;
33096 emit_insn (pat);
33097 return target;
33098 }
33099
33100 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33101
33102 static rtx
33103 ix86_expand_sse_compare (const struct builtin_description *d,
33104 tree exp, rtx target, bool swap)
33105 {
33106 rtx pat;
33107 tree arg0 = CALL_EXPR_ARG (exp, 0);
33108 tree arg1 = CALL_EXPR_ARG (exp, 1);
33109 rtx op0 = expand_normal (arg0);
33110 rtx op1 = expand_normal (arg1);
33111 rtx op2;
33112 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33113 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33114 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33115 enum rtx_code comparison = d->comparison;
33116
33117 if (VECTOR_MODE_P (mode0))
33118 op0 = safe_vector_operand (op0, mode0);
33119 if (VECTOR_MODE_P (mode1))
33120 op1 = safe_vector_operand (op1, mode1);
33121
33122 /* Swap operands if we have a comparison that isn't available in
33123 hardware. */
33124 if (swap)
33125 {
33126 rtx tmp = gen_reg_rtx (mode1);
33127 emit_move_insn (tmp, op1);
33128 op1 = op0;
33129 op0 = tmp;
33130 }
33131
33132 if (optimize || !target
33133 || GET_MODE (target) != tmode
33134 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33135 target = gen_reg_rtx (tmode);
33136
33137 if ((optimize && !register_operand (op0, mode0))
33138 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33139 op0 = copy_to_mode_reg (mode0, op0);
33140 if ((optimize && !register_operand (op1, mode1))
33141 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33142 op1 = copy_to_mode_reg (mode1, op1);
33143
33144 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33145 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33146 if (! pat)
33147 return 0;
33148 emit_insn (pat);
33149 return target;
33150 }
33151
33152 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33153
33154 static rtx
33155 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33156 rtx target)
33157 {
33158 rtx pat;
33159 tree arg0 = CALL_EXPR_ARG (exp, 0);
33160 tree arg1 = CALL_EXPR_ARG (exp, 1);
33161 rtx op0 = expand_normal (arg0);
33162 rtx op1 = expand_normal (arg1);
33163 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33164 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33165 enum rtx_code comparison = d->comparison;
33166
33167 if (VECTOR_MODE_P (mode0))
33168 op0 = safe_vector_operand (op0, mode0);
33169 if (VECTOR_MODE_P (mode1))
33170 op1 = safe_vector_operand (op1, mode1);
33171
33172 /* Swap operands if we have a comparison that isn't available in
33173 hardware. */
33174 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33175 {
33176 rtx tmp = op1;
33177 op1 = op0;
33178 op0 = tmp;
33179 }
33180
33181 target = gen_reg_rtx (SImode);
33182 emit_move_insn (target, const0_rtx);
33183 target = gen_rtx_SUBREG (QImode, target, 0);
33184
33185 if ((optimize && !register_operand (op0, mode0))
33186 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33187 op0 = copy_to_mode_reg (mode0, op0);
33188 if ((optimize && !register_operand (op1, mode1))
33189 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33190 op1 = copy_to_mode_reg (mode1, op1);
33191
33192 pat = GEN_FCN (d->icode) (op0, op1);
33193 if (! pat)
33194 return 0;
33195 emit_insn (pat);
33196 emit_insn (gen_rtx_SET (VOIDmode,
33197 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33198 gen_rtx_fmt_ee (comparison, QImode,
33199 SET_DEST (pat),
33200 const0_rtx)));
33201
33202 return SUBREG_REG (target);
33203 }
33204
33205 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33206
33207 static rtx
33208 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33209 rtx target)
33210 {
33211 rtx pat;
33212 tree arg0 = CALL_EXPR_ARG (exp, 0);
33213 rtx op1, op0 = expand_normal (arg0);
33214 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33215 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33216
33217 if (optimize || target == 0
33218 || GET_MODE (target) != tmode
33219 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33220 target = gen_reg_rtx (tmode);
33221
33222 if (VECTOR_MODE_P (mode0))
33223 op0 = safe_vector_operand (op0, mode0);
33224
33225 if ((optimize && !register_operand (op0, mode0))
33226 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33227 op0 = copy_to_mode_reg (mode0, op0);
33228
33229 op1 = GEN_INT (d->comparison);
33230
33231 pat = GEN_FCN (d->icode) (target, op0, op1);
33232 if (! pat)
33233 return 0;
33234 emit_insn (pat);
33235 return target;
33236 }
33237
33238 static rtx
33239 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33240 tree exp, rtx target)
33241 {
33242 rtx pat;
33243 tree arg0 = CALL_EXPR_ARG (exp, 0);
33244 tree arg1 = CALL_EXPR_ARG (exp, 1);
33245 rtx op0 = expand_normal (arg0);
33246 rtx op1 = expand_normal (arg1);
33247 rtx op2;
33248 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33249 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33250 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33251
33252 if (optimize || target == 0
33253 || GET_MODE (target) != tmode
33254 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33255 target = gen_reg_rtx (tmode);
33256
33257 op0 = safe_vector_operand (op0, mode0);
33258 op1 = safe_vector_operand (op1, mode1);
33259
33260 if ((optimize && !register_operand (op0, mode0))
33261 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33262 op0 = copy_to_mode_reg (mode0, op0);
33263 if ((optimize && !register_operand (op1, mode1))
33264 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33265 op1 = copy_to_mode_reg (mode1, op1);
33266
33267 op2 = GEN_INT (d->comparison);
33268
33269 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33270 if (! pat)
33271 return 0;
33272 emit_insn (pat);
33273 return target;
33274 }
33275
33276 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33277
33278 static rtx
33279 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33280 rtx target)
33281 {
33282 rtx pat;
33283 tree arg0 = CALL_EXPR_ARG (exp, 0);
33284 tree arg1 = CALL_EXPR_ARG (exp, 1);
33285 rtx op0 = expand_normal (arg0);
33286 rtx op1 = expand_normal (arg1);
33287 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33288 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33289 enum rtx_code comparison = d->comparison;
33290
33291 if (VECTOR_MODE_P (mode0))
33292 op0 = safe_vector_operand (op0, mode0);
33293 if (VECTOR_MODE_P (mode1))
33294 op1 = safe_vector_operand (op1, mode1);
33295
33296 target = gen_reg_rtx (SImode);
33297 emit_move_insn (target, const0_rtx);
33298 target = gen_rtx_SUBREG (QImode, target, 0);
33299
33300 if ((optimize && !register_operand (op0, mode0))
33301 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33302 op0 = copy_to_mode_reg (mode0, op0);
33303 if ((optimize && !register_operand (op1, mode1))
33304 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33305 op1 = copy_to_mode_reg (mode1, op1);
33306
33307 pat = GEN_FCN (d->icode) (op0, op1);
33308 if (! pat)
33309 return 0;
33310 emit_insn (pat);
33311 emit_insn (gen_rtx_SET (VOIDmode,
33312 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33313 gen_rtx_fmt_ee (comparison, QImode,
33314 SET_DEST (pat),
33315 const0_rtx)));
33316
33317 return SUBREG_REG (target);
33318 }
33319
33320 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33321
33322 static rtx
33323 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33324 tree exp, rtx target)
33325 {
33326 rtx pat;
33327 tree arg0 = CALL_EXPR_ARG (exp, 0);
33328 tree arg1 = CALL_EXPR_ARG (exp, 1);
33329 tree arg2 = CALL_EXPR_ARG (exp, 2);
33330 tree arg3 = CALL_EXPR_ARG (exp, 3);
33331 tree arg4 = CALL_EXPR_ARG (exp, 4);
33332 rtx scratch0, scratch1;
33333 rtx op0 = expand_normal (arg0);
33334 rtx op1 = expand_normal (arg1);
33335 rtx op2 = expand_normal (arg2);
33336 rtx op3 = expand_normal (arg3);
33337 rtx op4 = expand_normal (arg4);
33338 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33339
33340 tmode0 = insn_data[d->icode].operand[0].mode;
33341 tmode1 = insn_data[d->icode].operand[1].mode;
33342 modev2 = insn_data[d->icode].operand[2].mode;
33343 modei3 = insn_data[d->icode].operand[3].mode;
33344 modev4 = insn_data[d->icode].operand[4].mode;
33345 modei5 = insn_data[d->icode].operand[5].mode;
33346 modeimm = insn_data[d->icode].operand[6].mode;
33347
33348 if (VECTOR_MODE_P (modev2))
33349 op0 = safe_vector_operand (op0, modev2);
33350 if (VECTOR_MODE_P (modev4))
33351 op2 = safe_vector_operand (op2, modev4);
33352
33353 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33354 op0 = copy_to_mode_reg (modev2, op0);
33355 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33356 op1 = copy_to_mode_reg (modei3, op1);
33357 if ((optimize && !register_operand (op2, modev4))
33358 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33359 op2 = copy_to_mode_reg (modev4, op2);
33360 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33361 op3 = copy_to_mode_reg (modei5, op3);
33362
33363 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33364 {
33365 error ("the fifth argument must be an 8-bit immediate");
33366 return const0_rtx;
33367 }
33368
33369 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33370 {
33371 if (optimize || !target
33372 || GET_MODE (target) != tmode0
33373 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33374 target = gen_reg_rtx (tmode0);
33375
33376 scratch1 = gen_reg_rtx (tmode1);
33377
33378 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33379 }
33380 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33381 {
33382 if (optimize || !target
33383 || GET_MODE (target) != tmode1
33384 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33385 target = gen_reg_rtx (tmode1);
33386
33387 scratch0 = gen_reg_rtx (tmode0);
33388
33389 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33390 }
33391 else
33392 {
33393 gcc_assert (d->flag);
33394
33395 scratch0 = gen_reg_rtx (tmode0);
33396 scratch1 = gen_reg_rtx (tmode1);
33397
33398 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33399 }
33400
33401 if (! pat)
33402 return 0;
33403
33404 emit_insn (pat);
33405
33406 if (d->flag)
33407 {
33408 target = gen_reg_rtx (SImode);
33409 emit_move_insn (target, const0_rtx);
33410 target = gen_rtx_SUBREG (QImode, target, 0);
33411
33412 emit_insn
33413 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33414 gen_rtx_fmt_ee (EQ, QImode,
33415 gen_rtx_REG ((enum machine_mode) d->flag,
33416 FLAGS_REG),
33417 const0_rtx)));
33418 return SUBREG_REG (target);
33419 }
33420 else
33421 return target;
33422 }
33423
33424
33425 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33426
33427 static rtx
33428 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33429 tree exp, rtx target)
33430 {
33431 rtx pat;
33432 tree arg0 = CALL_EXPR_ARG (exp, 0);
33433 tree arg1 = CALL_EXPR_ARG (exp, 1);
33434 tree arg2 = CALL_EXPR_ARG (exp, 2);
33435 rtx scratch0, scratch1;
33436 rtx op0 = expand_normal (arg0);
33437 rtx op1 = expand_normal (arg1);
33438 rtx op2 = expand_normal (arg2);
33439 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33440
33441 tmode0 = insn_data[d->icode].operand[0].mode;
33442 tmode1 = insn_data[d->icode].operand[1].mode;
33443 modev2 = insn_data[d->icode].operand[2].mode;
33444 modev3 = insn_data[d->icode].operand[3].mode;
33445 modeimm = insn_data[d->icode].operand[4].mode;
33446
33447 if (VECTOR_MODE_P (modev2))
33448 op0 = safe_vector_operand (op0, modev2);
33449 if (VECTOR_MODE_P (modev3))
33450 op1 = safe_vector_operand (op1, modev3);
33451
33452 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33453 op0 = copy_to_mode_reg (modev2, op0);
33454 if ((optimize && !register_operand (op1, modev3))
33455 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33456 op1 = copy_to_mode_reg (modev3, op1);
33457
33458 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33459 {
33460 error ("the third argument must be an 8-bit immediate");
33461 return const0_rtx;
33462 }
33463
33464 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33465 {
33466 if (optimize || !target
33467 || GET_MODE (target) != tmode0
33468 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33469 target = gen_reg_rtx (tmode0);
33470
33471 scratch1 = gen_reg_rtx (tmode1);
33472
33473 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33474 }
33475 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33476 {
33477 if (optimize || !target
33478 || GET_MODE (target) != tmode1
33479 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33480 target = gen_reg_rtx (tmode1);
33481
33482 scratch0 = gen_reg_rtx (tmode0);
33483
33484 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33485 }
33486 else
33487 {
33488 gcc_assert (d->flag);
33489
33490 scratch0 = gen_reg_rtx (tmode0);
33491 scratch1 = gen_reg_rtx (tmode1);
33492
33493 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33494 }
33495
33496 if (! pat)
33497 return 0;
33498
33499 emit_insn (pat);
33500
33501 if (d->flag)
33502 {
33503 target = gen_reg_rtx (SImode);
33504 emit_move_insn (target, const0_rtx);
33505 target = gen_rtx_SUBREG (QImode, target, 0);
33506
33507 emit_insn
33508 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33509 gen_rtx_fmt_ee (EQ, QImode,
33510 gen_rtx_REG ((enum machine_mode) d->flag,
33511 FLAGS_REG),
33512 const0_rtx)));
33513 return SUBREG_REG (target);
33514 }
33515 else
33516 return target;
33517 }
33518
33519 /* Subroutine of ix86_expand_builtin to take care of insns with
33520 variable number of operands. */
33521
33522 static rtx
33523 ix86_expand_args_builtin (const struct builtin_description *d,
33524 tree exp, rtx target)
33525 {
33526 rtx pat, real_target;
33527 unsigned int i, nargs;
33528 unsigned int nargs_constant = 0;
33529 unsigned int mask_pos = 0;
33530 int num_memory = 0;
33531 struct
33532 {
33533 rtx op;
33534 enum machine_mode mode;
33535 } args[6];
33536 bool last_arg_count = false;
33537 enum insn_code icode = d->icode;
33538 const struct insn_data_d *insn_p = &insn_data[icode];
33539 enum machine_mode tmode = insn_p->operand[0].mode;
33540 enum machine_mode rmode = VOIDmode;
33541 bool swap = false;
33542 enum rtx_code comparison = d->comparison;
33543
33544 switch ((enum ix86_builtin_func_type) d->flag)
33545 {
33546 case V2DF_FTYPE_V2DF_ROUND:
33547 case V4DF_FTYPE_V4DF_ROUND:
33548 case V4SF_FTYPE_V4SF_ROUND:
33549 case V8SF_FTYPE_V8SF_ROUND:
33550 case V4SI_FTYPE_V4SF_ROUND:
33551 case V8SI_FTYPE_V8SF_ROUND:
33552 return ix86_expand_sse_round (d, exp, target);
33553 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33554 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33555 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33556 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33557 case INT_FTYPE_V8SF_V8SF_PTEST:
33558 case INT_FTYPE_V4DI_V4DI_PTEST:
33559 case INT_FTYPE_V4DF_V4DF_PTEST:
33560 case INT_FTYPE_V4SF_V4SF_PTEST:
33561 case INT_FTYPE_V2DI_V2DI_PTEST:
33562 case INT_FTYPE_V2DF_V2DF_PTEST:
33563 return ix86_expand_sse_ptest (d, exp, target);
33564 case FLOAT128_FTYPE_FLOAT128:
33565 case FLOAT_FTYPE_FLOAT:
33566 case INT_FTYPE_INT:
33567 case UINT64_FTYPE_INT:
33568 case UINT16_FTYPE_UINT16:
33569 case INT64_FTYPE_INT64:
33570 case INT64_FTYPE_V4SF:
33571 case INT64_FTYPE_V2DF:
33572 case INT_FTYPE_V16QI:
33573 case INT_FTYPE_V8QI:
33574 case INT_FTYPE_V8SF:
33575 case INT_FTYPE_V4DF:
33576 case INT_FTYPE_V4SF:
33577 case INT_FTYPE_V2DF:
33578 case INT_FTYPE_V32QI:
33579 case V16QI_FTYPE_V16QI:
33580 case V8SI_FTYPE_V8SF:
33581 case V8SI_FTYPE_V4SI:
33582 case V8HI_FTYPE_V8HI:
33583 case V8HI_FTYPE_V16QI:
33584 case V8QI_FTYPE_V8QI:
33585 case V8SF_FTYPE_V8SF:
33586 case V8SF_FTYPE_V8SI:
33587 case V8SF_FTYPE_V4SF:
33588 case V8SF_FTYPE_V8HI:
33589 case V4SI_FTYPE_V4SI:
33590 case V4SI_FTYPE_V16QI:
33591 case V4SI_FTYPE_V4SF:
33592 case V4SI_FTYPE_V8SI:
33593 case V4SI_FTYPE_V8HI:
33594 case V4SI_FTYPE_V4DF:
33595 case V4SI_FTYPE_V2DF:
33596 case V4HI_FTYPE_V4HI:
33597 case V4DF_FTYPE_V4DF:
33598 case V4DF_FTYPE_V4SI:
33599 case V4DF_FTYPE_V4SF:
33600 case V4DF_FTYPE_V2DF:
33601 case V4SF_FTYPE_V4SF:
33602 case V4SF_FTYPE_V4SI:
33603 case V4SF_FTYPE_V8SF:
33604 case V4SF_FTYPE_V4DF:
33605 case V4SF_FTYPE_V8HI:
33606 case V4SF_FTYPE_V2DF:
33607 case V2DI_FTYPE_V2DI:
33608 case V2DI_FTYPE_V16QI:
33609 case V2DI_FTYPE_V8HI:
33610 case V2DI_FTYPE_V4SI:
33611 case V2DF_FTYPE_V2DF:
33612 case V2DF_FTYPE_V4SI:
33613 case V2DF_FTYPE_V4DF:
33614 case V2DF_FTYPE_V4SF:
33615 case V2DF_FTYPE_V2SI:
33616 case V2SI_FTYPE_V2SI:
33617 case V2SI_FTYPE_V4SF:
33618 case V2SI_FTYPE_V2SF:
33619 case V2SI_FTYPE_V2DF:
33620 case V2SF_FTYPE_V2SF:
33621 case V2SF_FTYPE_V2SI:
33622 case V32QI_FTYPE_V32QI:
33623 case V32QI_FTYPE_V16QI:
33624 case V16HI_FTYPE_V16HI:
33625 case V16HI_FTYPE_V8HI:
33626 case V8SI_FTYPE_V8SI:
33627 case V16HI_FTYPE_V16QI:
33628 case V8SI_FTYPE_V16QI:
33629 case V4DI_FTYPE_V16QI:
33630 case V8SI_FTYPE_V8HI:
33631 case V4DI_FTYPE_V8HI:
33632 case V4DI_FTYPE_V4SI:
33633 case V4DI_FTYPE_V2DI:
33634 case HI_FTYPE_HI:
33635 case UINT_FTYPE_V2DF:
33636 case UINT_FTYPE_V4SF:
33637 case UINT64_FTYPE_V2DF:
33638 case UINT64_FTYPE_V4SF:
33639 case V16QI_FTYPE_V8DI:
33640 case V16HI_FTYPE_V16SI:
33641 case V16SI_FTYPE_HI:
33642 case V16SI_FTYPE_V16SI:
33643 case V16SI_FTYPE_INT:
33644 case V16SF_FTYPE_FLOAT:
33645 case V16SF_FTYPE_V4SF:
33646 case V16SF_FTYPE_V16SF:
33647 case V8HI_FTYPE_V8DI:
33648 case V8UHI_FTYPE_V8UHI:
33649 case V8SI_FTYPE_V8DI:
33650 case V8USI_FTYPE_V8USI:
33651 case V8SF_FTYPE_V8DF:
33652 case V8DI_FTYPE_QI:
33653 case V8DI_FTYPE_INT64:
33654 case V8DI_FTYPE_V4DI:
33655 case V8DI_FTYPE_V8DI:
33656 case V8DF_FTYPE_DOUBLE:
33657 case V8DF_FTYPE_V4DF:
33658 case V8DF_FTYPE_V8DF:
33659 case V8DF_FTYPE_V8SI:
33660 nargs = 1;
33661 break;
33662 case V4SF_FTYPE_V4SF_VEC_MERGE:
33663 case V2DF_FTYPE_V2DF_VEC_MERGE:
33664 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33665 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33666 case V16QI_FTYPE_V16QI_V16QI:
33667 case V16QI_FTYPE_V8HI_V8HI:
33668 case V16SI_FTYPE_V16SI_V16SI:
33669 case V16SF_FTYPE_V16SF_V16SF:
33670 case V16SF_FTYPE_V16SF_V16SI:
33671 case V8QI_FTYPE_V8QI_V8QI:
33672 case V8QI_FTYPE_V4HI_V4HI:
33673 case V8HI_FTYPE_V8HI_V8HI:
33674 case V8HI_FTYPE_V16QI_V16QI:
33675 case V8HI_FTYPE_V4SI_V4SI:
33676 case V8SF_FTYPE_V8SF_V8SF:
33677 case V8SF_FTYPE_V8SF_V8SI:
33678 case V8DI_FTYPE_V8DI_V8DI:
33679 case V8DF_FTYPE_V8DF_V8DF:
33680 case V8DF_FTYPE_V8DF_V8DI:
33681 case V4SI_FTYPE_V4SI_V4SI:
33682 case V4SI_FTYPE_V8HI_V8HI:
33683 case V4SI_FTYPE_V4SF_V4SF:
33684 case V4SI_FTYPE_V2DF_V2DF:
33685 case V4HI_FTYPE_V4HI_V4HI:
33686 case V4HI_FTYPE_V8QI_V8QI:
33687 case V4HI_FTYPE_V2SI_V2SI:
33688 case V4DF_FTYPE_V4DF_V4DF:
33689 case V4DF_FTYPE_V4DF_V4DI:
33690 case V4SF_FTYPE_V4SF_V4SF:
33691 case V4SF_FTYPE_V4SF_V4SI:
33692 case V4SF_FTYPE_V4SF_V2SI:
33693 case V4SF_FTYPE_V4SF_V2DF:
33694 case V4SF_FTYPE_V4SF_UINT:
33695 case V4SF_FTYPE_V4SF_UINT64:
33696 case V4SF_FTYPE_V4SF_DI:
33697 case V4SF_FTYPE_V4SF_SI:
33698 case V2DI_FTYPE_V2DI_V2DI:
33699 case V2DI_FTYPE_V16QI_V16QI:
33700 case V2DI_FTYPE_V4SI_V4SI:
33701 case V2UDI_FTYPE_V4USI_V4USI:
33702 case V2DI_FTYPE_V2DI_V16QI:
33703 case V2DI_FTYPE_V2DF_V2DF:
33704 case V2SI_FTYPE_V2SI_V2SI:
33705 case V2SI_FTYPE_V4HI_V4HI:
33706 case V2SI_FTYPE_V2SF_V2SF:
33707 case V2DF_FTYPE_V2DF_V2DF:
33708 case V2DF_FTYPE_V2DF_V4SF:
33709 case V2DF_FTYPE_V2DF_V2DI:
33710 case V2DF_FTYPE_V2DF_DI:
33711 case V2DF_FTYPE_V2DF_SI:
33712 case V2DF_FTYPE_V2DF_UINT:
33713 case V2DF_FTYPE_V2DF_UINT64:
33714 case V2SF_FTYPE_V2SF_V2SF:
33715 case V1DI_FTYPE_V1DI_V1DI:
33716 case V1DI_FTYPE_V8QI_V8QI:
33717 case V1DI_FTYPE_V2SI_V2SI:
33718 case V32QI_FTYPE_V16HI_V16HI:
33719 case V16HI_FTYPE_V8SI_V8SI:
33720 case V32QI_FTYPE_V32QI_V32QI:
33721 case V16HI_FTYPE_V32QI_V32QI:
33722 case V16HI_FTYPE_V16HI_V16HI:
33723 case V8SI_FTYPE_V4DF_V4DF:
33724 case V8SI_FTYPE_V8SI_V8SI:
33725 case V8SI_FTYPE_V16HI_V16HI:
33726 case V4DI_FTYPE_V4DI_V4DI:
33727 case V4DI_FTYPE_V8SI_V8SI:
33728 case V4UDI_FTYPE_V8USI_V8USI:
33729 case QI_FTYPE_V8DI_V8DI:
33730 case HI_FTYPE_V16SI_V16SI:
33731 if (comparison == UNKNOWN)
33732 return ix86_expand_binop_builtin (icode, exp, target);
33733 nargs = 2;
33734 break;
33735 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33736 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33737 gcc_assert (comparison != UNKNOWN);
33738 nargs = 2;
33739 swap = true;
33740 break;
33741 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33742 case V16HI_FTYPE_V16HI_SI_COUNT:
33743 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33744 case V8SI_FTYPE_V8SI_SI_COUNT:
33745 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33746 case V4DI_FTYPE_V4DI_INT_COUNT:
33747 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33748 case V8HI_FTYPE_V8HI_SI_COUNT:
33749 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33750 case V4SI_FTYPE_V4SI_SI_COUNT:
33751 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33752 case V4HI_FTYPE_V4HI_SI_COUNT:
33753 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33754 case V2DI_FTYPE_V2DI_SI_COUNT:
33755 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33756 case V2SI_FTYPE_V2SI_SI_COUNT:
33757 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33758 case V1DI_FTYPE_V1DI_SI_COUNT:
33759 nargs = 2;
33760 last_arg_count = true;
33761 break;
33762 case UINT64_FTYPE_UINT64_UINT64:
33763 case UINT_FTYPE_UINT_UINT:
33764 case UINT_FTYPE_UINT_USHORT:
33765 case UINT_FTYPE_UINT_UCHAR:
33766 case UINT16_FTYPE_UINT16_INT:
33767 case UINT8_FTYPE_UINT8_INT:
33768 case HI_FTYPE_HI_HI:
33769 case V16SI_FTYPE_V8DF_V8DF:
33770 nargs = 2;
33771 break;
33772 case V2DI_FTYPE_V2DI_INT_CONVERT:
33773 nargs = 2;
33774 rmode = V1TImode;
33775 nargs_constant = 1;
33776 break;
33777 case V4DI_FTYPE_V4DI_INT_CONVERT:
33778 nargs = 2;
33779 rmode = V2TImode;
33780 nargs_constant = 1;
33781 break;
33782 case V8HI_FTYPE_V8HI_INT:
33783 case V8HI_FTYPE_V8SF_INT:
33784 case V16HI_FTYPE_V16SF_INT:
33785 case V8HI_FTYPE_V4SF_INT:
33786 case V8SF_FTYPE_V8SF_INT:
33787 case V4SF_FTYPE_V16SF_INT:
33788 case V16SF_FTYPE_V16SF_INT:
33789 case V4SI_FTYPE_V4SI_INT:
33790 case V4SI_FTYPE_V8SI_INT:
33791 case V4HI_FTYPE_V4HI_INT:
33792 case V4DF_FTYPE_V4DF_INT:
33793 case V4DF_FTYPE_V8DF_INT:
33794 case V4SF_FTYPE_V4SF_INT:
33795 case V4SF_FTYPE_V8SF_INT:
33796 case V2DI_FTYPE_V2DI_INT:
33797 case V2DF_FTYPE_V2DF_INT:
33798 case V2DF_FTYPE_V4DF_INT:
33799 case V16HI_FTYPE_V16HI_INT:
33800 case V8SI_FTYPE_V8SI_INT:
33801 case V16SI_FTYPE_V16SI_INT:
33802 case V4SI_FTYPE_V16SI_INT:
33803 case V4DI_FTYPE_V4DI_INT:
33804 case V2DI_FTYPE_V4DI_INT:
33805 case V4DI_FTYPE_V8DI_INT:
33806 case HI_FTYPE_HI_INT:
33807 nargs = 2;
33808 nargs_constant = 1;
33809 break;
33810 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33811 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33812 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33813 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33814 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33815 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33816 case HI_FTYPE_V16SI_V16SI_HI:
33817 case QI_FTYPE_V8DI_V8DI_QI:
33818 case V16HI_FTYPE_V16SI_V16HI_HI:
33819 case V16QI_FTYPE_V16SI_V16QI_HI:
33820 case V16QI_FTYPE_V8DI_V16QI_QI:
33821 case V16SF_FTYPE_V16SF_V16SF_HI:
33822 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33823 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33824 case V16SF_FTYPE_V16SI_V16SF_HI:
33825 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33826 case V16SF_FTYPE_V4SF_V16SF_HI:
33827 case V16SI_FTYPE_SI_V16SI_HI:
33828 case V16SI_FTYPE_V16HI_V16SI_HI:
33829 case V16SI_FTYPE_V16QI_V16SI_HI:
33830 case V16SI_FTYPE_V16SF_V16SI_HI:
33831 case V16SI_FTYPE_V16SI_V16SI_HI:
33832 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33833 case V16SI_FTYPE_V4SI_V16SI_HI:
33834 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33835 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33836 case V8DF_FTYPE_V2DF_V8DF_QI:
33837 case V8DF_FTYPE_V4DF_V8DF_QI:
33838 case V8DF_FTYPE_V8DF_V8DF_QI:
33839 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33840 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33841 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33842 case V8DF_FTYPE_V8SF_V8DF_QI:
33843 case V8DF_FTYPE_V8SI_V8DF_QI:
33844 case V8DI_FTYPE_DI_V8DI_QI:
33845 case V8DI_FTYPE_V16QI_V8DI_QI:
33846 case V8DI_FTYPE_V2DI_V8DI_QI:
33847 case V8DI_FTYPE_V4DI_V8DI_QI:
33848 case V8DI_FTYPE_V8DI_V8DI_QI:
33849 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33850 case V8DI_FTYPE_V8HI_V8DI_QI:
33851 case V8DI_FTYPE_V8SI_V8DI_QI:
33852 case V8HI_FTYPE_V8DI_V8HI_QI:
33853 case V8SF_FTYPE_V8DF_V8SF_QI:
33854 case V8SI_FTYPE_V8DF_V8SI_QI:
33855 case V8SI_FTYPE_V8DI_V8SI_QI:
33856 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33857 nargs = 3;
33858 break;
33859 case V32QI_FTYPE_V32QI_V32QI_INT:
33860 case V16HI_FTYPE_V16HI_V16HI_INT:
33861 case V16QI_FTYPE_V16QI_V16QI_INT:
33862 case V4DI_FTYPE_V4DI_V4DI_INT:
33863 case V8HI_FTYPE_V8HI_V8HI_INT:
33864 case V8SI_FTYPE_V8SI_V8SI_INT:
33865 case V8SI_FTYPE_V8SI_V4SI_INT:
33866 case V8SF_FTYPE_V8SF_V8SF_INT:
33867 case V8SF_FTYPE_V8SF_V4SF_INT:
33868 case V4SI_FTYPE_V4SI_V4SI_INT:
33869 case V4DF_FTYPE_V4DF_V4DF_INT:
33870 case V16SF_FTYPE_V16SF_V16SF_INT:
33871 case V16SF_FTYPE_V16SF_V4SF_INT:
33872 case V16SI_FTYPE_V16SI_V4SI_INT:
33873 case V4DF_FTYPE_V4DF_V2DF_INT:
33874 case V4SF_FTYPE_V4SF_V4SF_INT:
33875 case V2DI_FTYPE_V2DI_V2DI_INT:
33876 case V4DI_FTYPE_V4DI_V2DI_INT:
33877 case V2DF_FTYPE_V2DF_V2DF_INT:
33878 case QI_FTYPE_V8DI_V8DI_INT:
33879 case QI_FTYPE_V8DF_V8DF_INT:
33880 case QI_FTYPE_V2DF_V2DF_INT:
33881 case QI_FTYPE_V4SF_V4SF_INT:
33882 case HI_FTYPE_V16SI_V16SI_INT:
33883 case HI_FTYPE_V16SF_V16SF_INT:
33884 nargs = 3;
33885 nargs_constant = 1;
33886 break;
33887 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33888 nargs = 3;
33889 rmode = V4DImode;
33890 nargs_constant = 1;
33891 break;
33892 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33893 nargs = 3;
33894 rmode = V2DImode;
33895 nargs_constant = 1;
33896 break;
33897 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33898 nargs = 3;
33899 rmode = DImode;
33900 nargs_constant = 1;
33901 break;
33902 case V2DI_FTYPE_V2DI_UINT_UINT:
33903 nargs = 3;
33904 nargs_constant = 2;
33905 break;
33906 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33907 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33908 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33909 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33910 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33911 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33912 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33913 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33914 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33915 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33916 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33917 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33918 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33919 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33920 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33921 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33922 nargs = 4;
33923 break;
33924 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33925 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33926 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33927 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33928 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33929 nargs = 4;
33930 nargs_constant = 1;
33931 break;
33932 case QI_FTYPE_V2DF_V2DF_INT_QI:
33933 case QI_FTYPE_V4SF_V4SF_INT_QI:
33934 nargs = 4;
33935 mask_pos = 1;
33936 nargs_constant = 1;
33937 break;
33938 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33939 nargs = 4;
33940 nargs_constant = 2;
33941 break;
33942 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33943 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33944 nargs = 4;
33945 break;
33946 case QI_FTYPE_V8DI_V8DI_INT_QI:
33947 case HI_FTYPE_V16SI_V16SI_INT_HI:
33948 case QI_FTYPE_V8DF_V8DF_INT_QI:
33949 case HI_FTYPE_V16SF_V16SF_INT_HI:
33950 mask_pos = 1;
33951 nargs = 4;
33952 nargs_constant = 1;
33953 break;
33954 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33955 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33956 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33957 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33958 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33959 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33960 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33961 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33962 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33963 nargs = 4;
33964 mask_pos = 2;
33965 nargs_constant = 1;
33966 break;
33967 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33968 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33969 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33970 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33971 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33972 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33973 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33974 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33975 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33976 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33977 nargs = 5;
33978 mask_pos = 2;
33979 nargs_constant = 1;
33980 break;
33981 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33982 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33983 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33984 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33985 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33986 nargs = 5;
33987 mask_pos = 1;
33988 nargs_constant = 1;
33989 break;
33990
33991 default:
33992 gcc_unreachable ();
33993 }
33994
33995 gcc_assert (nargs <= ARRAY_SIZE (args));
33996
33997 if (comparison != UNKNOWN)
33998 {
33999 gcc_assert (nargs == 2);
34000 return ix86_expand_sse_compare (d, exp, target, swap);
34001 }
34002
34003 if (rmode == VOIDmode || rmode == tmode)
34004 {
34005 if (optimize
34006 || target == 0
34007 || GET_MODE (target) != tmode
34008 || !insn_p->operand[0].predicate (target, tmode))
34009 target = gen_reg_rtx (tmode);
34010 real_target = target;
34011 }
34012 else
34013 {
34014 real_target = gen_reg_rtx (tmode);
34015 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34016 }
34017
34018 for (i = 0; i < nargs; i++)
34019 {
34020 tree arg = CALL_EXPR_ARG (exp, i);
34021 rtx op = expand_normal (arg);
34022 enum machine_mode mode = insn_p->operand[i + 1].mode;
34023 bool match = insn_p->operand[i + 1].predicate (op, mode);
34024
34025 if (last_arg_count && (i + 1) == nargs)
34026 {
34027 /* SIMD shift insns take either an 8-bit immediate or
34028 register as count. But builtin functions take int as
34029 count. If count doesn't match, we put it in register. */
34030 if (!match)
34031 {
34032 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34033 if (!insn_p->operand[i + 1].predicate (op, mode))
34034 op = copy_to_reg (op);
34035 }
34036 }
34037 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34038 (!mask_pos && (nargs - i) <= nargs_constant))
34039 {
34040 if (!match)
34041 switch (icode)
34042 {
34043 case CODE_FOR_avx2_inserti128:
34044 case CODE_FOR_avx2_extracti128:
34045 error ("the last argument must be an 1-bit immediate");
34046 return const0_rtx;
34047
34048 case CODE_FOR_avx512f_cmpv8di3_mask:
34049 case CODE_FOR_avx512f_cmpv16si3_mask:
34050 case CODE_FOR_avx512f_ucmpv8di3_mask:
34051 case CODE_FOR_avx512f_ucmpv16si3_mask:
34052 error ("the last argument must be a 3-bit immediate");
34053 return const0_rtx;
34054
34055 case CODE_FOR_sse4_1_roundsd:
34056 case CODE_FOR_sse4_1_roundss:
34057
34058 case CODE_FOR_sse4_1_roundpd:
34059 case CODE_FOR_sse4_1_roundps:
34060 case CODE_FOR_avx_roundpd256:
34061 case CODE_FOR_avx_roundps256:
34062
34063 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34064 case CODE_FOR_sse4_1_roundps_sfix:
34065 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34066 case CODE_FOR_avx_roundps_sfix256:
34067
34068 case CODE_FOR_sse4_1_blendps:
34069 case CODE_FOR_avx_blendpd256:
34070 case CODE_FOR_avx_vpermilv4df:
34071 case CODE_FOR_avx512f_getmantv8df_mask:
34072 case CODE_FOR_avx512f_getmantv16sf_mask:
34073 error ("the last argument must be a 4-bit immediate");
34074 return const0_rtx;
34075
34076 case CODE_FOR_sha1rnds4:
34077 case CODE_FOR_sse4_1_blendpd:
34078 case CODE_FOR_avx_vpermilv2df:
34079 case CODE_FOR_xop_vpermil2v2df3:
34080 case CODE_FOR_xop_vpermil2v4sf3:
34081 case CODE_FOR_xop_vpermil2v4df3:
34082 case CODE_FOR_xop_vpermil2v8sf3:
34083 case CODE_FOR_avx512f_vinsertf32x4_mask:
34084 case CODE_FOR_avx512f_vinserti32x4_mask:
34085 case CODE_FOR_avx512f_vextractf32x4_mask:
34086 case CODE_FOR_avx512f_vextracti32x4_mask:
34087 error ("the last argument must be a 2-bit immediate");
34088 return const0_rtx;
34089
34090 case CODE_FOR_avx_vextractf128v4df:
34091 case CODE_FOR_avx_vextractf128v8sf:
34092 case CODE_FOR_avx_vextractf128v8si:
34093 case CODE_FOR_avx_vinsertf128v4df:
34094 case CODE_FOR_avx_vinsertf128v8sf:
34095 case CODE_FOR_avx_vinsertf128v8si:
34096 case CODE_FOR_avx512f_vinsertf64x4_mask:
34097 case CODE_FOR_avx512f_vinserti64x4_mask:
34098 case CODE_FOR_avx512f_vextractf64x4_mask:
34099 case CODE_FOR_avx512f_vextracti64x4_mask:
34100 error ("the last argument must be a 1-bit immediate");
34101 return const0_rtx;
34102
34103 case CODE_FOR_avx_vmcmpv2df3:
34104 case CODE_FOR_avx_vmcmpv4sf3:
34105 case CODE_FOR_avx_cmpv2df3:
34106 case CODE_FOR_avx_cmpv4sf3:
34107 case CODE_FOR_avx_cmpv4df3:
34108 case CODE_FOR_avx_cmpv8sf3:
34109 case CODE_FOR_avx512f_cmpv8df3_mask:
34110 case CODE_FOR_avx512f_cmpv16sf3_mask:
34111 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34112 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34113 error ("the last argument must be a 5-bit immediate");
34114 return const0_rtx;
34115
34116 default:
34117 switch (nargs_constant)
34118 {
34119 case 2:
34120 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34121 (!mask_pos && (nargs - i) == nargs_constant))
34122 {
34123 error ("the next to last argument must be an 8-bit immediate");
34124 break;
34125 }
34126 case 1:
34127 error ("the last argument must be an 8-bit immediate");
34128 break;
34129 default:
34130 gcc_unreachable ();
34131 }
34132 return const0_rtx;
34133 }
34134 }
34135 else
34136 {
34137 if (VECTOR_MODE_P (mode))
34138 op = safe_vector_operand (op, mode);
34139
34140 /* If we aren't optimizing, only allow one memory operand to
34141 be generated. */
34142 if (memory_operand (op, mode))
34143 num_memory++;
34144
34145 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34146 {
34147 if (optimize || !match || num_memory > 1)
34148 op = copy_to_mode_reg (mode, op);
34149 }
34150 else
34151 {
34152 op = copy_to_reg (op);
34153 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34154 }
34155 }
34156
34157 args[i].op = op;
34158 args[i].mode = mode;
34159 }
34160
34161 switch (nargs)
34162 {
34163 case 1:
34164 pat = GEN_FCN (icode) (real_target, args[0].op);
34165 break;
34166 case 2:
34167 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34168 break;
34169 case 3:
34170 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34171 args[2].op);
34172 break;
34173 case 4:
34174 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34175 args[2].op, args[3].op);
34176 break;
34177 case 5:
34178 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34179 args[2].op, args[3].op, args[4].op);
34180 case 6:
34181 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34182 args[2].op, args[3].op, args[4].op,
34183 args[5].op);
34184 break;
34185 default:
34186 gcc_unreachable ();
34187 }
34188
34189 if (! pat)
34190 return 0;
34191
34192 emit_insn (pat);
34193 return target;
34194 }
34195
34196 /* Transform pattern of following layout:
34197 (parallel [
34198 set (A B)
34199 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34200 ])
34201 into:
34202 (set (A B))
34203
34204 Or:
34205 (parallel [ A B
34206 ...
34207 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34208 ...
34209 ])
34210 into:
34211 (parallel [ A B ... ]) */
34212
34213 static rtx
34214 ix86_erase_embedded_rounding (rtx pat)
34215 {
34216 if (GET_CODE (pat) == INSN)
34217 pat = PATTERN (pat);
34218
34219 gcc_assert (GET_CODE (pat) == PARALLEL);
34220
34221 if (XVECLEN (pat, 0) == 2)
34222 {
34223 rtx p0 = XVECEXP (pat, 0, 0);
34224 rtx p1 = XVECEXP (pat, 0, 1);
34225
34226 gcc_assert (GET_CODE (p0) == SET
34227 && GET_CODE (p1) == UNSPEC
34228 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34229
34230 return p0;
34231 }
34232 else
34233 {
34234 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34235 int i = 0;
34236 int j = 0;
34237
34238 for (; i < XVECLEN (pat, 0); ++i)
34239 {
34240 rtx elem = XVECEXP (pat, 0, i);
34241 if (GET_CODE (elem) != UNSPEC
34242 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34243 res [j++] = elem;
34244 }
34245
34246 /* No more than 1 occurence was removed. */
34247 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34248
34249 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34250 }
34251 }
34252
34253 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34254 with rounding. */
34255 static rtx
34256 ix86_expand_sse_comi_round (const struct builtin_description *d,
34257 tree exp, rtx target)
34258 {
34259 rtx pat, set_dst;
34260 tree arg0 = CALL_EXPR_ARG (exp, 0);
34261 tree arg1 = CALL_EXPR_ARG (exp, 1);
34262 tree arg2 = CALL_EXPR_ARG (exp, 2);
34263 tree arg3 = CALL_EXPR_ARG (exp, 3);
34264 rtx op0 = expand_normal (arg0);
34265 rtx op1 = expand_normal (arg1);
34266 rtx op2 = expand_normal (arg2);
34267 rtx op3 = expand_normal (arg3);
34268 enum insn_code icode = d->icode;
34269 const struct insn_data_d *insn_p = &insn_data[icode];
34270 enum machine_mode mode0 = insn_p->operand[0].mode;
34271 enum machine_mode mode1 = insn_p->operand[1].mode;
34272 enum rtx_code comparison = UNEQ;
34273 bool need_ucomi = false;
34274
34275 /* See avxintrin.h for values. */
34276 enum rtx_code comi_comparisons[32] =
34277 {
34278 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34279 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34280 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34281 };
34282 bool need_ucomi_values[32] =
34283 {
34284 true, false, false, true, true, false, false, true,
34285 true, false, false, true, true, false, false, true,
34286 false, true, true, false, false, true, true, false,
34287 false, true, true, false, false, true, true, false
34288 };
34289
34290 if (!CONST_INT_P (op2))
34291 {
34292 error ("the third argument must be comparison constant");
34293 return const0_rtx;
34294 }
34295 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34296 {
34297 error ("incorect comparison mode");
34298 return const0_rtx;
34299 }
34300
34301 if (!insn_p->operand[2].predicate (op3, SImode))
34302 {
34303 error ("incorrect rounding operand");
34304 return const0_rtx;
34305 }
34306
34307 comparison = comi_comparisons[INTVAL (op2)];
34308 need_ucomi = need_ucomi_values[INTVAL (op2)];
34309
34310 if (VECTOR_MODE_P (mode0))
34311 op0 = safe_vector_operand (op0, mode0);
34312 if (VECTOR_MODE_P (mode1))
34313 op1 = safe_vector_operand (op1, mode1);
34314
34315 target = gen_reg_rtx (SImode);
34316 emit_move_insn (target, const0_rtx);
34317 target = gen_rtx_SUBREG (QImode, target, 0);
34318
34319 if ((optimize && !register_operand (op0, mode0))
34320 || !insn_p->operand[0].predicate (op0, mode0))
34321 op0 = copy_to_mode_reg (mode0, op0);
34322 if ((optimize && !register_operand (op1, mode1))
34323 || !insn_p->operand[1].predicate (op1, mode1))
34324 op1 = copy_to_mode_reg (mode1, op1);
34325
34326 if (need_ucomi)
34327 icode = icode == CODE_FOR_sse_comi_round
34328 ? CODE_FOR_sse_ucomi_round
34329 : CODE_FOR_sse2_ucomi_round;
34330
34331 pat = GEN_FCN (icode) (op0, op1, op3);
34332 if (! pat)
34333 return 0;
34334
34335 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34336 if (INTVAL (op3) == NO_ROUND)
34337 {
34338 pat = ix86_erase_embedded_rounding (pat);
34339 if (! pat)
34340 return 0;
34341
34342 set_dst = SET_DEST (pat);
34343 }
34344 else
34345 {
34346 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34347 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34348 }
34349
34350 emit_insn (pat);
34351 emit_insn (gen_rtx_SET (VOIDmode,
34352 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34353 gen_rtx_fmt_ee (comparison, QImode,
34354 set_dst,
34355 const0_rtx)));
34356
34357 return SUBREG_REG (target);
34358 }
34359
34360 static rtx
34361 ix86_expand_round_builtin (const struct builtin_description *d,
34362 tree exp, rtx target)
34363 {
34364 rtx pat;
34365 unsigned int i, nargs;
34366 struct
34367 {
34368 rtx op;
34369 enum machine_mode mode;
34370 } args[6];
34371 enum insn_code icode = d->icode;
34372 const struct insn_data_d *insn_p = &insn_data[icode];
34373 enum machine_mode tmode = insn_p->operand[0].mode;
34374 unsigned int nargs_constant = 0;
34375 unsigned int redundant_embed_rnd = 0;
34376
34377 switch ((enum ix86_builtin_func_type) d->flag)
34378 {
34379 case UINT64_FTYPE_V2DF_INT:
34380 case UINT64_FTYPE_V4SF_INT:
34381 case UINT_FTYPE_V2DF_INT:
34382 case UINT_FTYPE_V4SF_INT:
34383 case INT64_FTYPE_V2DF_INT:
34384 case INT64_FTYPE_V4SF_INT:
34385 case INT_FTYPE_V2DF_INT:
34386 case INT_FTYPE_V4SF_INT:
34387 nargs = 2;
34388 break;
34389 case V4SF_FTYPE_V4SF_UINT_INT:
34390 case V4SF_FTYPE_V4SF_UINT64_INT:
34391 case V2DF_FTYPE_V2DF_UINT64_INT:
34392 case V4SF_FTYPE_V4SF_INT_INT:
34393 case V4SF_FTYPE_V4SF_INT64_INT:
34394 case V2DF_FTYPE_V2DF_INT64_INT:
34395 case V4SF_FTYPE_V4SF_V4SF_INT:
34396 case V2DF_FTYPE_V2DF_V2DF_INT:
34397 case V4SF_FTYPE_V4SF_V2DF_INT:
34398 case V2DF_FTYPE_V2DF_V4SF_INT:
34399 nargs = 3;
34400 break;
34401 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34402 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34403 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34404 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34405 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34406 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34407 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34408 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34409 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34410 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34411 nargs = 4;
34412 break;
34413 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34414 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34415 nargs_constant = 2;
34416 nargs = 4;
34417 break;
34418 case INT_FTYPE_V4SF_V4SF_INT_INT:
34419 case INT_FTYPE_V2DF_V2DF_INT_INT:
34420 return ix86_expand_sse_comi_round (d, exp, target);
34421 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34422 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34423 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34424 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34425 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34426 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34427 nargs = 5;
34428 break;
34429 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34430 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34431 nargs_constant = 4;
34432 nargs = 5;
34433 break;
34434 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34435 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34436 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34437 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34438 nargs_constant = 3;
34439 nargs = 5;
34440 break;
34441 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34442 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34443 nargs = 6;
34444 nargs_constant = 4;
34445 break;
34446 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34447 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34448 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34449 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34450 nargs = 6;
34451 nargs_constant = 3;
34452 break;
34453 default:
34454 gcc_unreachable ();
34455 }
34456 gcc_assert (nargs <= ARRAY_SIZE (args));
34457
34458 if (optimize
34459 || target == 0
34460 || GET_MODE (target) != tmode
34461 || !insn_p->operand[0].predicate (target, tmode))
34462 target = gen_reg_rtx (tmode);
34463
34464 for (i = 0; i < nargs; i++)
34465 {
34466 tree arg = CALL_EXPR_ARG (exp, i);
34467 rtx op = expand_normal (arg);
34468 enum machine_mode mode = insn_p->operand[i + 1].mode;
34469 bool match = insn_p->operand[i + 1].predicate (op, mode);
34470
34471 if (i == nargs - nargs_constant)
34472 {
34473 if (!match)
34474 {
34475 switch (icode)
34476 {
34477 case CODE_FOR_avx512f_getmantv8df_mask_round:
34478 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34479 case CODE_FOR_avx512f_getmantv2df_round:
34480 case CODE_FOR_avx512f_getmantv4sf_round:
34481 error ("the immediate argument must be a 4-bit immediate");
34482 return const0_rtx;
34483 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34484 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34485 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34486 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34487 error ("the immediate argument must be a 5-bit immediate");
34488 return const0_rtx;
34489 default:
34490 error ("the immediate argument must be an 8-bit immediate");
34491 return const0_rtx;
34492 }
34493 }
34494 }
34495 else if (i == nargs-1)
34496 {
34497 if (!insn_p->operand[nargs].predicate (op, SImode))
34498 {
34499 error ("incorrect rounding operand");
34500 return const0_rtx;
34501 }
34502
34503 /* If there is no rounding use normal version of the pattern. */
34504 if (INTVAL (op) == NO_ROUND)
34505 redundant_embed_rnd = 1;
34506 }
34507 else
34508 {
34509 if (VECTOR_MODE_P (mode))
34510 op = safe_vector_operand (op, mode);
34511
34512 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34513 {
34514 if (optimize || !match)
34515 op = copy_to_mode_reg (mode, op);
34516 }
34517 else
34518 {
34519 op = copy_to_reg (op);
34520 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34521 }
34522 }
34523
34524 args[i].op = op;
34525 args[i].mode = mode;
34526 }
34527
34528 switch (nargs)
34529 {
34530 case 1:
34531 pat = GEN_FCN (icode) (target, args[0].op);
34532 break;
34533 case 2:
34534 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34535 break;
34536 case 3:
34537 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34538 args[2].op);
34539 break;
34540 case 4:
34541 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34542 args[2].op, args[3].op);
34543 break;
34544 case 5:
34545 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34546 args[2].op, args[3].op, args[4].op);
34547 case 6:
34548 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34549 args[2].op, args[3].op, args[4].op,
34550 args[5].op);
34551 break;
34552 default:
34553 gcc_unreachable ();
34554 }
34555
34556 if (!pat)
34557 return 0;
34558
34559 if (redundant_embed_rnd)
34560 pat = ix86_erase_embedded_rounding (pat);
34561
34562 emit_insn (pat);
34563 return target;
34564 }
34565
34566 /* Subroutine of ix86_expand_builtin to take care of special insns
34567 with variable number of operands. */
34568
34569 static rtx
34570 ix86_expand_special_args_builtin (const struct builtin_description *d,
34571 tree exp, rtx target)
34572 {
34573 tree arg;
34574 rtx pat, op;
34575 unsigned int i, nargs, arg_adjust, memory;
34576 bool aligned_mem = false;
34577 struct
34578 {
34579 rtx op;
34580 enum machine_mode mode;
34581 } args[3];
34582 enum insn_code icode = d->icode;
34583 bool last_arg_constant = false;
34584 const struct insn_data_d *insn_p = &insn_data[icode];
34585 enum machine_mode tmode = insn_p->operand[0].mode;
34586 enum { load, store } klass;
34587
34588 switch ((enum ix86_builtin_func_type) d->flag)
34589 {
34590 case VOID_FTYPE_VOID:
34591 emit_insn (GEN_FCN (icode) (target));
34592 return 0;
34593 case VOID_FTYPE_UINT64:
34594 case VOID_FTYPE_UNSIGNED:
34595 nargs = 0;
34596 klass = store;
34597 memory = 0;
34598 break;
34599
34600 case INT_FTYPE_VOID:
34601 case UINT64_FTYPE_VOID:
34602 case UNSIGNED_FTYPE_VOID:
34603 nargs = 0;
34604 klass = load;
34605 memory = 0;
34606 break;
34607 case UINT64_FTYPE_PUNSIGNED:
34608 case V2DI_FTYPE_PV2DI:
34609 case V4DI_FTYPE_PV4DI:
34610 case V32QI_FTYPE_PCCHAR:
34611 case V16QI_FTYPE_PCCHAR:
34612 case V8SF_FTYPE_PCV4SF:
34613 case V8SF_FTYPE_PCFLOAT:
34614 case V4SF_FTYPE_PCFLOAT:
34615 case V4DF_FTYPE_PCV2DF:
34616 case V4DF_FTYPE_PCDOUBLE:
34617 case V2DF_FTYPE_PCDOUBLE:
34618 case VOID_FTYPE_PVOID:
34619 case V16SI_FTYPE_PV4SI:
34620 case V16SF_FTYPE_PV4SF:
34621 case V8DI_FTYPE_PV4DI:
34622 case V8DI_FTYPE_PV8DI:
34623 case V8DF_FTYPE_PV4DF:
34624 nargs = 1;
34625 klass = load;
34626 memory = 0;
34627 switch (icode)
34628 {
34629 case CODE_FOR_sse4_1_movntdqa:
34630 case CODE_FOR_avx2_movntdqa:
34631 case CODE_FOR_avx512f_movntdqa:
34632 aligned_mem = true;
34633 break;
34634 default:
34635 break;
34636 }
34637 break;
34638 case VOID_FTYPE_PV2SF_V4SF:
34639 case VOID_FTYPE_PV8DI_V8DI:
34640 case VOID_FTYPE_PV4DI_V4DI:
34641 case VOID_FTYPE_PV2DI_V2DI:
34642 case VOID_FTYPE_PCHAR_V32QI:
34643 case VOID_FTYPE_PCHAR_V16QI:
34644 case VOID_FTYPE_PFLOAT_V16SF:
34645 case VOID_FTYPE_PFLOAT_V8SF:
34646 case VOID_FTYPE_PFLOAT_V4SF:
34647 case VOID_FTYPE_PDOUBLE_V8DF:
34648 case VOID_FTYPE_PDOUBLE_V4DF:
34649 case VOID_FTYPE_PDOUBLE_V2DF:
34650 case VOID_FTYPE_PLONGLONG_LONGLONG:
34651 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34652 case VOID_FTYPE_PINT_INT:
34653 nargs = 1;
34654 klass = store;
34655 /* Reserve memory operand for target. */
34656 memory = ARRAY_SIZE (args);
34657 switch (icode)
34658 {
34659 /* These builtins and instructions require the memory
34660 to be properly aligned. */
34661 case CODE_FOR_avx_movntv4di:
34662 case CODE_FOR_sse2_movntv2di:
34663 case CODE_FOR_avx_movntv8sf:
34664 case CODE_FOR_sse_movntv4sf:
34665 case CODE_FOR_sse4a_vmmovntv4sf:
34666 case CODE_FOR_avx_movntv4df:
34667 case CODE_FOR_sse2_movntv2df:
34668 case CODE_FOR_sse4a_vmmovntv2df:
34669 case CODE_FOR_sse2_movntidi:
34670 case CODE_FOR_sse_movntq:
34671 case CODE_FOR_sse2_movntisi:
34672 case CODE_FOR_avx512f_movntv16sf:
34673 case CODE_FOR_avx512f_movntv8df:
34674 case CODE_FOR_avx512f_movntv8di:
34675 aligned_mem = true;
34676 break;
34677 default:
34678 break;
34679 }
34680 break;
34681 case V4SF_FTYPE_V4SF_PCV2SF:
34682 case V2DF_FTYPE_V2DF_PCDOUBLE:
34683 nargs = 2;
34684 klass = load;
34685 memory = 1;
34686 break;
34687 case V8SF_FTYPE_PCV8SF_V8SI:
34688 case V4DF_FTYPE_PCV4DF_V4DI:
34689 case V4SF_FTYPE_PCV4SF_V4SI:
34690 case V2DF_FTYPE_PCV2DF_V2DI:
34691 case V8SI_FTYPE_PCV8SI_V8SI:
34692 case V4DI_FTYPE_PCV4DI_V4DI:
34693 case V4SI_FTYPE_PCV4SI_V4SI:
34694 case V2DI_FTYPE_PCV2DI_V2DI:
34695 nargs = 2;
34696 klass = load;
34697 memory = 0;
34698 break;
34699 case VOID_FTYPE_PV8DF_V8DF_QI:
34700 case VOID_FTYPE_PV16SF_V16SF_HI:
34701 case VOID_FTYPE_PV8DI_V8DI_QI:
34702 case VOID_FTYPE_PV16SI_V16SI_HI:
34703 switch (icode)
34704 {
34705 /* These builtins and instructions require the memory
34706 to be properly aligned. */
34707 case CODE_FOR_avx512f_storev16sf_mask:
34708 case CODE_FOR_avx512f_storev16si_mask:
34709 case CODE_FOR_avx512f_storev8df_mask:
34710 case CODE_FOR_avx512f_storev8di_mask:
34711 aligned_mem = true;
34712 break;
34713 default:
34714 break;
34715 }
34716 /* FALLTHRU */
34717 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34718 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34719 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34720 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34721 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34722 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34723 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34724 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34725 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34726 case VOID_FTYPE_PFLOAT_V4SF_QI:
34727 case VOID_FTYPE_PV8SI_V8DI_QI:
34728 case VOID_FTYPE_PV8HI_V8DI_QI:
34729 case VOID_FTYPE_PV16HI_V16SI_HI:
34730 case VOID_FTYPE_PV16QI_V8DI_QI:
34731 case VOID_FTYPE_PV16QI_V16SI_HI:
34732 nargs = 2;
34733 klass = store;
34734 /* Reserve memory operand for target. */
34735 memory = ARRAY_SIZE (args);
34736 break;
34737 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34738 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34739 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34740 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34741 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34742 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34743 nargs = 3;
34744 klass = load;
34745 memory = 0;
34746 switch (icode)
34747 {
34748 /* These builtins and instructions require the memory
34749 to be properly aligned. */
34750 case CODE_FOR_avx512f_loadv16sf_mask:
34751 case CODE_FOR_avx512f_loadv16si_mask:
34752 case CODE_FOR_avx512f_loadv8df_mask:
34753 case CODE_FOR_avx512f_loadv8di_mask:
34754 aligned_mem = true;
34755 break;
34756 default:
34757 break;
34758 }
34759 break;
34760 case VOID_FTYPE_UINT_UINT_UINT:
34761 case VOID_FTYPE_UINT64_UINT_UINT:
34762 case UCHAR_FTYPE_UINT_UINT_UINT:
34763 case UCHAR_FTYPE_UINT64_UINT_UINT:
34764 nargs = 3;
34765 klass = load;
34766 memory = ARRAY_SIZE (args);
34767 last_arg_constant = true;
34768 break;
34769 default:
34770 gcc_unreachable ();
34771 }
34772
34773 gcc_assert (nargs <= ARRAY_SIZE (args));
34774
34775 if (klass == store)
34776 {
34777 arg = CALL_EXPR_ARG (exp, 0);
34778 op = expand_normal (arg);
34779 gcc_assert (target == 0);
34780 if (memory)
34781 {
34782 op = ix86_zero_extend_to_Pmode (op);
34783 target = gen_rtx_MEM (tmode, op);
34784 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34785 on it. Try to improve it using get_pointer_alignment,
34786 and if the special builtin is one that requires strict
34787 mode alignment, also from it's GET_MODE_ALIGNMENT.
34788 Failure to do so could lead to ix86_legitimate_combined_insn
34789 rejecting all changes to such insns. */
34790 unsigned int align = get_pointer_alignment (arg);
34791 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34792 align = GET_MODE_ALIGNMENT (tmode);
34793 if (MEM_ALIGN (target) < align)
34794 set_mem_align (target, align);
34795 }
34796 else
34797 target = force_reg (tmode, op);
34798 arg_adjust = 1;
34799 }
34800 else
34801 {
34802 arg_adjust = 0;
34803 if (optimize
34804 || target == 0
34805 || !register_operand (target, tmode)
34806 || GET_MODE (target) != tmode)
34807 target = gen_reg_rtx (tmode);
34808 }
34809
34810 for (i = 0; i < nargs; i++)
34811 {
34812 enum machine_mode mode = insn_p->operand[i + 1].mode;
34813 bool match;
34814
34815 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34816 op = expand_normal (arg);
34817 match = insn_p->operand[i + 1].predicate (op, mode);
34818
34819 if (last_arg_constant && (i + 1) == nargs)
34820 {
34821 if (!match)
34822 {
34823 if (icode == CODE_FOR_lwp_lwpvalsi3
34824 || icode == CODE_FOR_lwp_lwpinssi3
34825 || icode == CODE_FOR_lwp_lwpvaldi3
34826 || icode == CODE_FOR_lwp_lwpinsdi3)
34827 error ("the last argument must be a 32-bit immediate");
34828 else
34829 error ("the last argument must be an 8-bit immediate");
34830 return const0_rtx;
34831 }
34832 }
34833 else
34834 {
34835 if (i == memory)
34836 {
34837 /* This must be the memory operand. */
34838 op = ix86_zero_extend_to_Pmode (op);
34839 op = gen_rtx_MEM (mode, op);
34840 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34841 on it. Try to improve it using get_pointer_alignment,
34842 and if the special builtin is one that requires strict
34843 mode alignment, also from it's GET_MODE_ALIGNMENT.
34844 Failure to do so could lead to ix86_legitimate_combined_insn
34845 rejecting all changes to such insns. */
34846 unsigned int align = get_pointer_alignment (arg);
34847 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34848 align = GET_MODE_ALIGNMENT (mode);
34849 if (MEM_ALIGN (op) < align)
34850 set_mem_align (op, align);
34851 }
34852 else
34853 {
34854 /* This must be register. */
34855 if (VECTOR_MODE_P (mode))
34856 op = safe_vector_operand (op, mode);
34857
34858 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34859 op = copy_to_mode_reg (mode, op);
34860 else
34861 {
34862 op = copy_to_reg (op);
34863 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34864 }
34865 }
34866 }
34867
34868 args[i].op = op;
34869 args[i].mode = mode;
34870 }
34871
34872 switch (nargs)
34873 {
34874 case 0:
34875 pat = GEN_FCN (icode) (target);
34876 break;
34877 case 1:
34878 pat = GEN_FCN (icode) (target, args[0].op);
34879 break;
34880 case 2:
34881 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34882 break;
34883 case 3:
34884 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34885 break;
34886 default:
34887 gcc_unreachable ();
34888 }
34889
34890 if (! pat)
34891 return 0;
34892 emit_insn (pat);
34893 return klass == store ? 0 : target;
34894 }
34895
34896 /* Return the integer constant in ARG. Constrain it to be in the range
34897 of the subparts of VEC_TYPE; issue an error if not. */
34898
34899 static int
34900 get_element_number (tree vec_type, tree arg)
34901 {
34902 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34903
34904 if (!tree_fits_uhwi_p (arg)
34905 || (elt = tree_to_uhwi (arg), elt > max))
34906 {
34907 error ("selector must be an integer constant in the range 0..%wi", max);
34908 return 0;
34909 }
34910
34911 return elt;
34912 }
34913
34914 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34915 ix86_expand_vector_init. We DO have language-level syntax for this, in
34916 the form of (type){ init-list }. Except that since we can't place emms
34917 instructions from inside the compiler, we can't allow the use of MMX
34918 registers unless the user explicitly asks for it. So we do *not* define
34919 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34920 we have builtins invoked by mmintrin.h that gives us license to emit
34921 these sorts of instructions. */
34922
34923 static rtx
34924 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34925 {
34926 enum machine_mode tmode = TYPE_MODE (type);
34927 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34928 int i, n_elt = GET_MODE_NUNITS (tmode);
34929 rtvec v = rtvec_alloc (n_elt);
34930
34931 gcc_assert (VECTOR_MODE_P (tmode));
34932 gcc_assert (call_expr_nargs (exp) == n_elt);
34933
34934 for (i = 0; i < n_elt; ++i)
34935 {
34936 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34937 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34938 }
34939
34940 if (!target || !register_operand (target, tmode))
34941 target = gen_reg_rtx (tmode);
34942
34943 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34944 return target;
34945 }
34946
34947 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34948 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34949 had a language-level syntax for referencing vector elements. */
34950
34951 static rtx
34952 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34953 {
34954 enum machine_mode tmode, mode0;
34955 tree arg0, arg1;
34956 int elt;
34957 rtx op0;
34958
34959 arg0 = CALL_EXPR_ARG (exp, 0);
34960 arg1 = CALL_EXPR_ARG (exp, 1);
34961
34962 op0 = expand_normal (arg0);
34963 elt = get_element_number (TREE_TYPE (arg0), arg1);
34964
34965 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34966 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34967 gcc_assert (VECTOR_MODE_P (mode0));
34968
34969 op0 = force_reg (mode0, op0);
34970
34971 if (optimize || !target || !register_operand (target, tmode))
34972 target = gen_reg_rtx (tmode);
34973
34974 ix86_expand_vector_extract (true, target, op0, elt);
34975
34976 return target;
34977 }
34978
34979 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34980 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34981 a language-level syntax for referencing vector elements. */
34982
34983 static rtx
34984 ix86_expand_vec_set_builtin (tree exp)
34985 {
34986 enum machine_mode tmode, mode1;
34987 tree arg0, arg1, arg2;
34988 int elt;
34989 rtx op0, op1, target;
34990
34991 arg0 = CALL_EXPR_ARG (exp, 0);
34992 arg1 = CALL_EXPR_ARG (exp, 1);
34993 arg2 = CALL_EXPR_ARG (exp, 2);
34994
34995 tmode = TYPE_MODE (TREE_TYPE (arg0));
34996 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34997 gcc_assert (VECTOR_MODE_P (tmode));
34998
34999 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35000 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35001 elt = get_element_number (TREE_TYPE (arg0), arg2);
35002
35003 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35004 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35005
35006 op0 = force_reg (tmode, op0);
35007 op1 = force_reg (mode1, op1);
35008
35009 /* OP0 is the source of these builtin functions and shouldn't be
35010 modified. Create a copy, use it and return it as target. */
35011 target = gen_reg_rtx (tmode);
35012 emit_move_insn (target, op0);
35013 ix86_expand_vector_set (true, target, op1, elt);
35014
35015 return target;
35016 }
35017
35018 /* Expand an expression EXP that calls a built-in function,
35019 with result going to TARGET if that's convenient
35020 (and in mode MODE if that's convenient).
35021 SUBTARGET may be used as the target for computing one of EXP's operands.
35022 IGNORE is nonzero if the value is to be ignored. */
35023
35024 static rtx
35025 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35026 enum machine_mode mode, int ignore)
35027 {
35028 const struct builtin_description *d;
35029 size_t i;
35030 enum insn_code icode;
35031 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35032 tree arg0, arg1, arg2, arg3, arg4;
35033 rtx op0, op1, op2, op3, op4, pat, insn;
35034 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35035 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35036
35037 /* For CPU builtins that can be folded, fold first and expand the fold. */
35038 switch (fcode)
35039 {
35040 case IX86_BUILTIN_CPU_INIT:
35041 {
35042 /* Make it call __cpu_indicator_init in libgcc. */
35043 tree call_expr, fndecl, type;
35044 type = build_function_type_list (integer_type_node, NULL_TREE);
35045 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35046 call_expr = build_call_expr (fndecl, 0);
35047 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35048 }
35049 case IX86_BUILTIN_CPU_IS:
35050 case IX86_BUILTIN_CPU_SUPPORTS:
35051 {
35052 tree arg0 = CALL_EXPR_ARG (exp, 0);
35053 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35054 gcc_assert (fold_expr != NULL_TREE);
35055 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35056 }
35057 }
35058
35059 /* Determine whether the builtin function is available under the current ISA.
35060 Originally the builtin was not created if it wasn't applicable to the
35061 current ISA based on the command line switches. With function specific
35062 options, we need to check in the context of the function making the call
35063 whether it is supported. */
35064 if (ix86_builtins_isa[fcode].isa
35065 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35066 {
35067 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35068 NULL, (enum fpmath_unit) 0, false);
35069
35070 if (!opts)
35071 error ("%qE needs unknown isa option", fndecl);
35072 else
35073 {
35074 gcc_assert (opts != NULL);
35075 error ("%qE needs isa option %s", fndecl, opts);
35076 free (opts);
35077 }
35078 return const0_rtx;
35079 }
35080
35081 switch (fcode)
35082 {
35083 case IX86_BUILTIN_MASKMOVQ:
35084 case IX86_BUILTIN_MASKMOVDQU:
35085 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35086 ? CODE_FOR_mmx_maskmovq
35087 : CODE_FOR_sse2_maskmovdqu);
35088 /* Note the arg order is different from the operand order. */
35089 arg1 = CALL_EXPR_ARG (exp, 0);
35090 arg2 = CALL_EXPR_ARG (exp, 1);
35091 arg0 = CALL_EXPR_ARG (exp, 2);
35092 op0 = expand_normal (arg0);
35093 op1 = expand_normal (arg1);
35094 op2 = expand_normal (arg2);
35095 mode0 = insn_data[icode].operand[0].mode;
35096 mode1 = insn_data[icode].operand[1].mode;
35097 mode2 = insn_data[icode].operand[2].mode;
35098
35099 op0 = ix86_zero_extend_to_Pmode (op0);
35100 op0 = gen_rtx_MEM (mode1, op0);
35101
35102 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35103 op0 = copy_to_mode_reg (mode0, op0);
35104 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35105 op1 = copy_to_mode_reg (mode1, op1);
35106 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35107 op2 = copy_to_mode_reg (mode2, op2);
35108 pat = GEN_FCN (icode) (op0, op1, op2);
35109 if (! pat)
35110 return 0;
35111 emit_insn (pat);
35112 return 0;
35113
35114 case IX86_BUILTIN_LDMXCSR:
35115 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35116 target = assign_386_stack_local (SImode, SLOT_TEMP);
35117 emit_move_insn (target, op0);
35118 emit_insn (gen_sse_ldmxcsr (target));
35119 return 0;
35120
35121 case IX86_BUILTIN_STMXCSR:
35122 target = assign_386_stack_local (SImode, SLOT_TEMP);
35123 emit_insn (gen_sse_stmxcsr (target));
35124 return copy_to_mode_reg (SImode, target);
35125
35126 case IX86_BUILTIN_CLFLUSH:
35127 arg0 = CALL_EXPR_ARG (exp, 0);
35128 op0 = expand_normal (arg0);
35129 icode = CODE_FOR_sse2_clflush;
35130 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35131 op0 = ix86_zero_extend_to_Pmode (op0);
35132
35133 emit_insn (gen_sse2_clflush (op0));
35134 return 0;
35135
35136 case IX86_BUILTIN_CLFLUSHOPT:
35137 arg0 = CALL_EXPR_ARG (exp, 0);
35138 op0 = expand_normal (arg0);
35139 icode = CODE_FOR_clflushopt;
35140 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35141 op0 = ix86_zero_extend_to_Pmode (op0);
35142
35143 emit_insn (gen_clflushopt (op0));
35144 return 0;
35145
35146 case IX86_BUILTIN_MONITOR:
35147 arg0 = CALL_EXPR_ARG (exp, 0);
35148 arg1 = CALL_EXPR_ARG (exp, 1);
35149 arg2 = CALL_EXPR_ARG (exp, 2);
35150 op0 = expand_normal (arg0);
35151 op1 = expand_normal (arg1);
35152 op2 = expand_normal (arg2);
35153 if (!REG_P (op0))
35154 op0 = ix86_zero_extend_to_Pmode (op0);
35155 if (!REG_P (op1))
35156 op1 = copy_to_mode_reg (SImode, op1);
35157 if (!REG_P (op2))
35158 op2 = copy_to_mode_reg (SImode, op2);
35159 emit_insn (ix86_gen_monitor (op0, op1, op2));
35160 return 0;
35161
35162 case IX86_BUILTIN_MWAIT:
35163 arg0 = CALL_EXPR_ARG (exp, 0);
35164 arg1 = CALL_EXPR_ARG (exp, 1);
35165 op0 = expand_normal (arg0);
35166 op1 = expand_normal (arg1);
35167 if (!REG_P (op0))
35168 op0 = copy_to_mode_reg (SImode, op0);
35169 if (!REG_P (op1))
35170 op1 = copy_to_mode_reg (SImode, op1);
35171 emit_insn (gen_sse3_mwait (op0, op1));
35172 return 0;
35173
35174 case IX86_BUILTIN_VEC_INIT_V2SI:
35175 case IX86_BUILTIN_VEC_INIT_V4HI:
35176 case IX86_BUILTIN_VEC_INIT_V8QI:
35177 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35178
35179 case IX86_BUILTIN_VEC_EXT_V2DF:
35180 case IX86_BUILTIN_VEC_EXT_V2DI:
35181 case IX86_BUILTIN_VEC_EXT_V4SF:
35182 case IX86_BUILTIN_VEC_EXT_V4SI:
35183 case IX86_BUILTIN_VEC_EXT_V8HI:
35184 case IX86_BUILTIN_VEC_EXT_V2SI:
35185 case IX86_BUILTIN_VEC_EXT_V4HI:
35186 case IX86_BUILTIN_VEC_EXT_V16QI:
35187 return ix86_expand_vec_ext_builtin (exp, target);
35188
35189 case IX86_BUILTIN_VEC_SET_V2DI:
35190 case IX86_BUILTIN_VEC_SET_V4SF:
35191 case IX86_BUILTIN_VEC_SET_V4SI:
35192 case IX86_BUILTIN_VEC_SET_V8HI:
35193 case IX86_BUILTIN_VEC_SET_V4HI:
35194 case IX86_BUILTIN_VEC_SET_V16QI:
35195 return ix86_expand_vec_set_builtin (exp);
35196
35197 case IX86_BUILTIN_INFQ:
35198 case IX86_BUILTIN_HUGE_VALQ:
35199 {
35200 REAL_VALUE_TYPE inf;
35201 rtx tmp;
35202
35203 real_inf (&inf);
35204 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35205
35206 tmp = validize_mem (force_const_mem (mode, tmp));
35207
35208 if (target == 0)
35209 target = gen_reg_rtx (mode);
35210
35211 emit_move_insn (target, tmp);
35212 return target;
35213 }
35214
35215 case IX86_BUILTIN_RDPMC:
35216 case IX86_BUILTIN_RDTSC:
35217 case IX86_BUILTIN_RDTSCP:
35218
35219 op0 = gen_reg_rtx (DImode);
35220 op1 = gen_reg_rtx (DImode);
35221
35222 if (fcode == IX86_BUILTIN_RDPMC)
35223 {
35224 arg0 = CALL_EXPR_ARG (exp, 0);
35225 op2 = expand_normal (arg0);
35226 if (!register_operand (op2, SImode))
35227 op2 = copy_to_mode_reg (SImode, op2);
35228
35229 insn = (TARGET_64BIT
35230 ? gen_rdpmc_rex64 (op0, op1, op2)
35231 : gen_rdpmc (op0, op2));
35232 emit_insn (insn);
35233 }
35234 else if (fcode == IX86_BUILTIN_RDTSC)
35235 {
35236 insn = (TARGET_64BIT
35237 ? gen_rdtsc_rex64 (op0, op1)
35238 : gen_rdtsc (op0));
35239 emit_insn (insn);
35240 }
35241 else
35242 {
35243 op2 = gen_reg_rtx (SImode);
35244
35245 insn = (TARGET_64BIT
35246 ? gen_rdtscp_rex64 (op0, op1, op2)
35247 : gen_rdtscp (op0, op2));
35248 emit_insn (insn);
35249
35250 arg0 = CALL_EXPR_ARG (exp, 0);
35251 op4 = expand_normal (arg0);
35252 if (!address_operand (op4, VOIDmode))
35253 {
35254 op4 = convert_memory_address (Pmode, op4);
35255 op4 = copy_addr_to_reg (op4);
35256 }
35257 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35258 }
35259
35260 if (target == 0)
35261 {
35262 /* mode is VOIDmode if __builtin_rd* has been called
35263 without lhs. */
35264 if (mode == VOIDmode)
35265 return target;
35266 target = gen_reg_rtx (mode);
35267 }
35268
35269 if (TARGET_64BIT)
35270 {
35271 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35272 op1, 1, OPTAB_DIRECT);
35273 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35274 op0, 1, OPTAB_DIRECT);
35275 }
35276
35277 emit_move_insn (target, op0);
35278 return target;
35279
35280 case IX86_BUILTIN_FXSAVE:
35281 case IX86_BUILTIN_FXRSTOR:
35282 case IX86_BUILTIN_FXSAVE64:
35283 case IX86_BUILTIN_FXRSTOR64:
35284 case IX86_BUILTIN_FNSTENV:
35285 case IX86_BUILTIN_FLDENV:
35286 case IX86_BUILTIN_FNSTSW:
35287 mode0 = BLKmode;
35288 switch (fcode)
35289 {
35290 case IX86_BUILTIN_FXSAVE:
35291 icode = CODE_FOR_fxsave;
35292 break;
35293 case IX86_BUILTIN_FXRSTOR:
35294 icode = CODE_FOR_fxrstor;
35295 break;
35296 case IX86_BUILTIN_FXSAVE64:
35297 icode = CODE_FOR_fxsave64;
35298 break;
35299 case IX86_BUILTIN_FXRSTOR64:
35300 icode = CODE_FOR_fxrstor64;
35301 break;
35302 case IX86_BUILTIN_FNSTENV:
35303 icode = CODE_FOR_fnstenv;
35304 break;
35305 case IX86_BUILTIN_FLDENV:
35306 icode = CODE_FOR_fldenv;
35307 break;
35308 case IX86_BUILTIN_FNSTSW:
35309 icode = CODE_FOR_fnstsw;
35310 mode0 = HImode;
35311 break;
35312 default:
35313 gcc_unreachable ();
35314 }
35315
35316 arg0 = CALL_EXPR_ARG (exp, 0);
35317 op0 = expand_normal (arg0);
35318
35319 if (!address_operand (op0, VOIDmode))
35320 {
35321 op0 = convert_memory_address (Pmode, op0);
35322 op0 = copy_addr_to_reg (op0);
35323 }
35324 op0 = gen_rtx_MEM (mode0, op0);
35325
35326 pat = GEN_FCN (icode) (op0);
35327 if (pat)
35328 emit_insn (pat);
35329 return 0;
35330
35331 case IX86_BUILTIN_XSAVE:
35332 case IX86_BUILTIN_XRSTOR:
35333 case IX86_BUILTIN_XSAVE64:
35334 case IX86_BUILTIN_XRSTOR64:
35335 case IX86_BUILTIN_XSAVEOPT:
35336 case IX86_BUILTIN_XSAVEOPT64:
35337 case IX86_BUILTIN_XSAVES:
35338 case IX86_BUILTIN_XRSTORS:
35339 case IX86_BUILTIN_XSAVES64:
35340 case IX86_BUILTIN_XRSTORS64:
35341 case IX86_BUILTIN_XSAVEC:
35342 case IX86_BUILTIN_XSAVEC64:
35343 arg0 = CALL_EXPR_ARG (exp, 0);
35344 arg1 = CALL_EXPR_ARG (exp, 1);
35345 op0 = expand_normal (arg0);
35346 op1 = expand_normal (arg1);
35347
35348 if (!address_operand (op0, VOIDmode))
35349 {
35350 op0 = convert_memory_address (Pmode, op0);
35351 op0 = copy_addr_to_reg (op0);
35352 }
35353 op0 = gen_rtx_MEM (BLKmode, op0);
35354
35355 op1 = force_reg (DImode, op1);
35356
35357 if (TARGET_64BIT)
35358 {
35359 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35360 NULL, 1, OPTAB_DIRECT);
35361 switch (fcode)
35362 {
35363 case IX86_BUILTIN_XSAVE:
35364 icode = CODE_FOR_xsave_rex64;
35365 break;
35366 case IX86_BUILTIN_XRSTOR:
35367 icode = CODE_FOR_xrstor_rex64;
35368 break;
35369 case IX86_BUILTIN_XSAVE64:
35370 icode = CODE_FOR_xsave64;
35371 break;
35372 case IX86_BUILTIN_XRSTOR64:
35373 icode = CODE_FOR_xrstor64;
35374 break;
35375 case IX86_BUILTIN_XSAVEOPT:
35376 icode = CODE_FOR_xsaveopt_rex64;
35377 break;
35378 case IX86_BUILTIN_XSAVEOPT64:
35379 icode = CODE_FOR_xsaveopt64;
35380 break;
35381 case IX86_BUILTIN_XSAVES:
35382 icode = CODE_FOR_xsaves_rex64;
35383 break;
35384 case IX86_BUILTIN_XRSTORS:
35385 icode = CODE_FOR_xrstors_rex64;
35386 break;
35387 case IX86_BUILTIN_XSAVES64:
35388 icode = CODE_FOR_xsaves64;
35389 break;
35390 case IX86_BUILTIN_XRSTORS64:
35391 icode = CODE_FOR_xrstors64;
35392 break;
35393 case IX86_BUILTIN_XSAVEC:
35394 icode = CODE_FOR_xsavec_rex64;
35395 break;
35396 case IX86_BUILTIN_XSAVEC64:
35397 icode = CODE_FOR_xsavec64;
35398 break;
35399 default:
35400 gcc_unreachable ();
35401 }
35402
35403 op2 = gen_lowpart (SImode, op2);
35404 op1 = gen_lowpart (SImode, op1);
35405 pat = GEN_FCN (icode) (op0, op1, op2);
35406 }
35407 else
35408 {
35409 switch (fcode)
35410 {
35411 case IX86_BUILTIN_XSAVE:
35412 icode = CODE_FOR_xsave;
35413 break;
35414 case IX86_BUILTIN_XRSTOR:
35415 icode = CODE_FOR_xrstor;
35416 break;
35417 case IX86_BUILTIN_XSAVEOPT:
35418 icode = CODE_FOR_xsaveopt;
35419 break;
35420 case IX86_BUILTIN_XSAVES:
35421 icode = CODE_FOR_xsaves;
35422 break;
35423 case IX86_BUILTIN_XRSTORS:
35424 icode = CODE_FOR_xrstors;
35425 break;
35426 case IX86_BUILTIN_XSAVEC:
35427 icode = CODE_FOR_xsavec;
35428 break;
35429 default:
35430 gcc_unreachable ();
35431 }
35432 pat = GEN_FCN (icode) (op0, op1);
35433 }
35434
35435 if (pat)
35436 emit_insn (pat);
35437 return 0;
35438
35439 case IX86_BUILTIN_LLWPCB:
35440 arg0 = CALL_EXPR_ARG (exp, 0);
35441 op0 = expand_normal (arg0);
35442 icode = CODE_FOR_lwp_llwpcb;
35443 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35444 op0 = ix86_zero_extend_to_Pmode (op0);
35445 emit_insn (gen_lwp_llwpcb (op0));
35446 return 0;
35447
35448 case IX86_BUILTIN_SLWPCB:
35449 icode = CODE_FOR_lwp_slwpcb;
35450 if (!target
35451 || !insn_data[icode].operand[0].predicate (target, Pmode))
35452 target = gen_reg_rtx (Pmode);
35453 emit_insn (gen_lwp_slwpcb (target));
35454 return target;
35455
35456 case IX86_BUILTIN_BEXTRI32:
35457 case IX86_BUILTIN_BEXTRI64:
35458 arg0 = CALL_EXPR_ARG (exp, 0);
35459 arg1 = CALL_EXPR_ARG (exp, 1);
35460 op0 = expand_normal (arg0);
35461 op1 = expand_normal (arg1);
35462 icode = (fcode == IX86_BUILTIN_BEXTRI32
35463 ? CODE_FOR_tbm_bextri_si
35464 : CODE_FOR_tbm_bextri_di);
35465 if (!CONST_INT_P (op1))
35466 {
35467 error ("last argument must be an immediate");
35468 return const0_rtx;
35469 }
35470 else
35471 {
35472 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35473 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35474 op1 = GEN_INT (length);
35475 op2 = GEN_INT (lsb_index);
35476 pat = GEN_FCN (icode) (target, op0, op1, op2);
35477 if (pat)
35478 emit_insn (pat);
35479 return target;
35480 }
35481
35482 case IX86_BUILTIN_RDRAND16_STEP:
35483 icode = CODE_FOR_rdrandhi_1;
35484 mode0 = HImode;
35485 goto rdrand_step;
35486
35487 case IX86_BUILTIN_RDRAND32_STEP:
35488 icode = CODE_FOR_rdrandsi_1;
35489 mode0 = SImode;
35490 goto rdrand_step;
35491
35492 case IX86_BUILTIN_RDRAND64_STEP:
35493 icode = CODE_FOR_rdranddi_1;
35494 mode0 = DImode;
35495
35496 rdrand_step:
35497 op0 = gen_reg_rtx (mode0);
35498 emit_insn (GEN_FCN (icode) (op0));
35499
35500 arg0 = CALL_EXPR_ARG (exp, 0);
35501 op1 = expand_normal (arg0);
35502 if (!address_operand (op1, VOIDmode))
35503 {
35504 op1 = convert_memory_address (Pmode, op1);
35505 op1 = copy_addr_to_reg (op1);
35506 }
35507 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35508
35509 op1 = gen_reg_rtx (SImode);
35510 emit_move_insn (op1, CONST1_RTX (SImode));
35511
35512 /* Emit SImode conditional move. */
35513 if (mode0 == HImode)
35514 {
35515 op2 = gen_reg_rtx (SImode);
35516 emit_insn (gen_zero_extendhisi2 (op2, op0));
35517 }
35518 else if (mode0 == SImode)
35519 op2 = op0;
35520 else
35521 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35522
35523 if (target == 0
35524 || !register_operand (target, SImode))
35525 target = gen_reg_rtx (SImode);
35526
35527 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35528 const0_rtx);
35529 emit_insn (gen_rtx_SET (VOIDmode, target,
35530 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35531 return target;
35532
35533 case IX86_BUILTIN_RDSEED16_STEP:
35534 icode = CODE_FOR_rdseedhi_1;
35535 mode0 = HImode;
35536 goto rdseed_step;
35537
35538 case IX86_BUILTIN_RDSEED32_STEP:
35539 icode = CODE_FOR_rdseedsi_1;
35540 mode0 = SImode;
35541 goto rdseed_step;
35542
35543 case IX86_BUILTIN_RDSEED64_STEP:
35544 icode = CODE_FOR_rdseeddi_1;
35545 mode0 = DImode;
35546
35547 rdseed_step:
35548 op0 = gen_reg_rtx (mode0);
35549 emit_insn (GEN_FCN (icode) (op0));
35550
35551 arg0 = CALL_EXPR_ARG (exp, 0);
35552 op1 = expand_normal (arg0);
35553 if (!address_operand (op1, VOIDmode))
35554 {
35555 op1 = convert_memory_address (Pmode, op1);
35556 op1 = copy_addr_to_reg (op1);
35557 }
35558 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35559
35560 op2 = gen_reg_rtx (QImode);
35561
35562 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35563 const0_rtx);
35564 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35565
35566 if (target == 0
35567 || !register_operand (target, SImode))
35568 target = gen_reg_rtx (SImode);
35569
35570 emit_insn (gen_zero_extendqisi2 (target, op2));
35571 return target;
35572
35573 case IX86_BUILTIN_ADDCARRYX32:
35574 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35575 mode0 = SImode;
35576 goto addcarryx;
35577
35578 case IX86_BUILTIN_ADDCARRYX64:
35579 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35580 mode0 = DImode;
35581
35582 addcarryx:
35583 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35584 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35585 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35586 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35587
35588 op0 = gen_reg_rtx (QImode);
35589
35590 /* Generate CF from input operand. */
35591 op1 = expand_normal (arg0);
35592 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35593 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35594
35595 /* Gen ADCX instruction to compute X+Y+CF. */
35596 op2 = expand_normal (arg1);
35597 op3 = expand_normal (arg2);
35598
35599 if (!REG_P (op2))
35600 op2 = copy_to_mode_reg (mode0, op2);
35601 if (!REG_P (op3))
35602 op3 = copy_to_mode_reg (mode0, op3);
35603
35604 op0 = gen_reg_rtx (mode0);
35605
35606 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35607 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35608 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35609
35610 /* Store the result. */
35611 op4 = expand_normal (arg3);
35612 if (!address_operand (op4, VOIDmode))
35613 {
35614 op4 = convert_memory_address (Pmode, op4);
35615 op4 = copy_addr_to_reg (op4);
35616 }
35617 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35618
35619 /* Return current CF value. */
35620 if (target == 0)
35621 target = gen_reg_rtx (QImode);
35622
35623 PUT_MODE (pat, QImode);
35624 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35625 return target;
35626
35627 case IX86_BUILTIN_READ_FLAGS:
35628 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35629
35630 if (optimize
35631 || target == NULL_RTX
35632 || !nonimmediate_operand (target, word_mode)
35633 || GET_MODE (target) != word_mode)
35634 target = gen_reg_rtx (word_mode);
35635
35636 emit_insn (gen_pop (target));
35637 return target;
35638
35639 case IX86_BUILTIN_WRITE_FLAGS:
35640
35641 arg0 = CALL_EXPR_ARG (exp, 0);
35642 op0 = expand_normal (arg0);
35643 if (!general_no_elim_operand (op0, word_mode))
35644 op0 = copy_to_mode_reg (word_mode, op0);
35645
35646 emit_insn (gen_push (op0));
35647 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35648 return 0;
35649
35650 case IX86_BUILTIN_KORTESTC16:
35651 icode = CODE_FOR_kortestchi;
35652 mode0 = HImode;
35653 mode1 = CCCmode;
35654 goto kortest;
35655
35656 case IX86_BUILTIN_KORTESTZ16:
35657 icode = CODE_FOR_kortestzhi;
35658 mode0 = HImode;
35659 mode1 = CCZmode;
35660
35661 kortest:
35662 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35663 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35664 op0 = expand_normal (arg0);
35665 op1 = expand_normal (arg1);
35666
35667 op0 = copy_to_reg (op0);
35668 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35669 op1 = copy_to_reg (op1);
35670 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35671
35672 target = gen_reg_rtx (QImode);
35673 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35674
35675 /* Emit kortest. */
35676 emit_insn (GEN_FCN (icode) (op0, op1));
35677 /* And use setcc to return result from flags. */
35678 ix86_expand_setcc (target, EQ,
35679 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35680 return target;
35681
35682 case IX86_BUILTIN_GATHERSIV2DF:
35683 icode = CODE_FOR_avx2_gathersiv2df;
35684 goto gather_gen;
35685 case IX86_BUILTIN_GATHERSIV4DF:
35686 icode = CODE_FOR_avx2_gathersiv4df;
35687 goto gather_gen;
35688 case IX86_BUILTIN_GATHERDIV2DF:
35689 icode = CODE_FOR_avx2_gatherdiv2df;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERDIV4DF:
35692 icode = CODE_FOR_avx2_gatherdiv4df;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERSIV4SF:
35695 icode = CODE_FOR_avx2_gathersiv4sf;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERSIV8SF:
35698 icode = CODE_FOR_avx2_gathersiv8sf;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERDIV4SF:
35701 icode = CODE_FOR_avx2_gatherdiv4sf;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERDIV8SF:
35704 icode = CODE_FOR_avx2_gatherdiv8sf;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERSIV2DI:
35707 icode = CODE_FOR_avx2_gathersiv2di;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERSIV4DI:
35710 icode = CODE_FOR_avx2_gathersiv4di;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERDIV2DI:
35713 icode = CODE_FOR_avx2_gatherdiv2di;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERDIV4DI:
35716 icode = CODE_FOR_avx2_gatherdiv4di;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERSIV4SI:
35719 icode = CODE_FOR_avx2_gathersiv4si;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERSIV8SI:
35722 icode = CODE_FOR_avx2_gathersiv8si;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERDIV4SI:
35725 icode = CODE_FOR_avx2_gatherdiv4si;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERDIV8SI:
35728 icode = CODE_FOR_avx2_gatherdiv8si;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERALTSIV4DF:
35731 icode = CODE_FOR_avx2_gathersiv4df;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHERALTDIV8SF:
35734 icode = CODE_FOR_avx2_gatherdiv8sf;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHERALTSIV4DI:
35737 icode = CODE_FOR_avx2_gathersiv4di;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHERALTDIV8SI:
35740 icode = CODE_FOR_avx2_gatherdiv8si;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHER3SIV16SF:
35743 icode = CODE_FOR_avx512f_gathersiv16sf;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHER3SIV8DF:
35746 icode = CODE_FOR_avx512f_gathersiv8df;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3DIV16SF:
35749 icode = CODE_FOR_avx512f_gatherdiv16sf;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3DIV8DF:
35752 icode = CODE_FOR_avx512f_gatherdiv8df;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3SIV16SI:
35755 icode = CODE_FOR_avx512f_gathersiv16si;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3SIV8DI:
35758 icode = CODE_FOR_avx512f_gathersiv8di;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3DIV16SI:
35761 icode = CODE_FOR_avx512f_gatherdiv16si;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3DIV8DI:
35764 icode = CODE_FOR_avx512f_gatherdiv8di;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35767 icode = CODE_FOR_avx512f_gathersiv8df;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35770 icode = CODE_FOR_avx512f_gatherdiv16sf;
35771 goto gather_gen;
35772 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35773 icode = CODE_FOR_avx512f_gathersiv8di;
35774 goto gather_gen;
35775 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35776 icode = CODE_FOR_avx512f_gatherdiv16si;
35777 goto gather_gen;
35778 case IX86_BUILTIN_SCATTERSIV16SF:
35779 icode = CODE_FOR_avx512f_scattersiv16sf;
35780 goto scatter_gen;
35781 case IX86_BUILTIN_SCATTERSIV8DF:
35782 icode = CODE_FOR_avx512f_scattersiv8df;
35783 goto scatter_gen;
35784 case IX86_BUILTIN_SCATTERDIV16SF:
35785 icode = CODE_FOR_avx512f_scatterdiv16sf;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERDIV8DF:
35788 icode = CODE_FOR_avx512f_scatterdiv8df;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERSIV16SI:
35791 icode = CODE_FOR_avx512f_scattersiv16si;
35792 goto scatter_gen;
35793 case IX86_BUILTIN_SCATTERSIV8DI:
35794 icode = CODE_FOR_avx512f_scattersiv8di;
35795 goto scatter_gen;
35796 case IX86_BUILTIN_SCATTERDIV16SI:
35797 icode = CODE_FOR_avx512f_scatterdiv16si;
35798 goto scatter_gen;
35799 case IX86_BUILTIN_SCATTERDIV8DI:
35800 icode = CODE_FOR_avx512f_scatterdiv8di;
35801 goto scatter_gen;
35802
35803 case IX86_BUILTIN_GATHERPFDPD:
35804 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35805 goto vec_prefetch_gen;
35806 case IX86_BUILTIN_GATHERPFDPS:
35807 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35808 goto vec_prefetch_gen;
35809 case IX86_BUILTIN_GATHERPFQPD:
35810 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_GATHERPFQPS:
35813 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_SCATTERPFDPD:
35816 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35817 goto vec_prefetch_gen;
35818 case IX86_BUILTIN_SCATTERPFDPS:
35819 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35820 goto vec_prefetch_gen;
35821 case IX86_BUILTIN_SCATTERPFQPD:
35822 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35823 goto vec_prefetch_gen;
35824 case IX86_BUILTIN_SCATTERPFQPS:
35825 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35826 goto vec_prefetch_gen;
35827
35828 gather_gen:
35829 rtx half;
35830 rtx (*gen) (rtx, rtx);
35831
35832 arg0 = CALL_EXPR_ARG (exp, 0);
35833 arg1 = CALL_EXPR_ARG (exp, 1);
35834 arg2 = CALL_EXPR_ARG (exp, 2);
35835 arg3 = CALL_EXPR_ARG (exp, 3);
35836 arg4 = CALL_EXPR_ARG (exp, 4);
35837 op0 = expand_normal (arg0);
35838 op1 = expand_normal (arg1);
35839 op2 = expand_normal (arg2);
35840 op3 = expand_normal (arg3);
35841 op4 = expand_normal (arg4);
35842 /* Note the arg order is different from the operand order. */
35843 mode0 = insn_data[icode].operand[1].mode;
35844 mode2 = insn_data[icode].operand[3].mode;
35845 mode3 = insn_data[icode].operand[4].mode;
35846 mode4 = insn_data[icode].operand[5].mode;
35847
35848 if (target == NULL_RTX
35849 || GET_MODE (target) != insn_data[icode].operand[0].mode
35850 || !insn_data[icode].operand[0].predicate (target,
35851 GET_MODE (target)))
35852 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35853 else
35854 subtarget = target;
35855
35856 switch (fcode)
35857 {
35858 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35859 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35860 half = gen_reg_rtx (V8SImode);
35861 if (!nonimmediate_operand (op2, V16SImode))
35862 op2 = copy_to_mode_reg (V16SImode, op2);
35863 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35864 op2 = half;
35865 break;
35866 case IX86_BUILTIN_GATHERALTSIV4DF:
35867 case IX86_BUILTIN_GATHERALTSIV4DI:
35868 half = gen_reg_rtx (V4SImode);
35869 if (!nonimmediate_operand (op2, V8SImode))
35870 op2 = copy_to_mode_reg (V8SImode, op2);
35871 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35872 op2 = half;
35873 break;
35874 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35875 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35876 half = gen_reg_rtx (mode0);
35877 if (mode0 == V8SFmode)
35878 gen = gen_vec_extract_lo_v16sf;
35879 else
35880 gen = gen_vec_extract_lo_v16si;
35881 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35882 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35883 emit_insn (gen (half, op0));
35884 op0 = half;
35885 if (GET_MODE (op3) != VOIDmode)
35886 {
35887 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35888 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35889 emit_insn (gen (half, op3));
35890 op3 = half;
35891 }
35892 break;
35893 case IX86_BUILTIN_GATHERALTDIV8SF:
35894 case IX86_BUILTIN_GATHERALTDIV8SI:
35895 half = gen_reg_rtx (mode0);
35896 if (mode0 == V4SFmode)
35897 gen = gen_vec_extract_lo_v8sf;
35898 else
35899 gen = gen_vec_extract_lo_v8si;
35900 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35901 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35902 emit_insn (gen (half, op0));
35903 op0 = half;
35904 if (GET_MODE (op3) != VOIDmode)
35905 {
35906 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35907 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35908 emit_insn (gen (half, op3));
35909 op3 = half;
35910 }
35911 break;
35912 default:
35913 break;
35914 }
35915
35916 /* Force memory operand only with base register here. But we
35917 don't want to do it on memory operand for other builtin
35918 functions. */
35919 op1 = ix86_zero_extend_to_Pmode (op1);
35920
35921 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35922 op0 = copy_to_mode_reg (mode0, op0);
35923 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35924 op1 = copy_to_mode_reg (Pmode, op1);
35925 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35926 op2 = copy_to_mode_reg (mode2, op2);
35927 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35928 {
35929 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35930 op3 = copy_to_mode_reg (mode3, op3);
35931 }
35932 else
35933 {
35934 op3 = copy_to_reg (op3);
35935 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35936 }
35937 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35938 {
35939 error ("the last argument must be scale 1, 2, 4, 8");
35940 return const0_rtx;
35941 }
35942
35943 /* Optimize. If mask is known to have all high bits set,
35944 replace op0 with pc_rtx to signal that the instruction
35945 overwrites the whole destination and doesn't use its
35946 previous contents. */
35947 if (optimize)
35948 {
35949 if (TREE_CODE (arg3) == INTEGER_CST)
35950 {
35951 if (integer_all_onesp (arg3))
35952 op0 = pc_rtx;
35953 }
35954 else if (TREE_CODE (arg3) == VECTOR_CST)
35955 {
35956 unsigned int negative = 0;
35957 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35958 {
35959 tree cst = VECTOR_CST_ELT (arg3, i);
35960 if (TREE_CODE (cst) == INTEGER_CST
35961 && tree_int_cst_sign_bit (cst))
35962 negative++;
35963 else if (TREE_CODE (cst) == REAL_CST
35964 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35965 negative++;
35966 }
35967 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35968 op0 = pc_rtx;
35969 }
35970 else if (TREE_CODE (arg3) == SSA_NAME
35971 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35972 {
35973 /* Recognize also when mask is like:
35974 __v2df src = _mm_setzero_pd ();
35975 __v2df mask = _mm_cmpeq_pd (src, src);
35976 or
35977 __v8sf src = _mm256_setzero_ps ();
35978 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35979 as that is a cheaper way to load all ones into
35980 a register than having to load a constant from
35981 memory. */
35982 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35983 if (is_gimple_call (def_stmt))
35984 {
35985 tree fndecl = gimple_call_fndecl (def_stmt);
35986 if (fndecl
35987 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35988 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35989 {
35990 case IX86_BUILTIN_CMPPD:
35991 case IX86_BUILTIN_CMPPS:
35992 case IX86_BUILTIN_CMPPD256:
35993 case IX86_BUILTIN_CMPPS256:
35994 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35995 break;
35996 /* FALLTHRU */
35997 case IX86_BUILTIN_CMPEQPD:
35998 case IX86_BUILTIN_CMPEQPS:
35999 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36000 && initializer_zerop (gimple_call_arg (def_stmt,
36001 1)))
36002 op0 = pc_rtx;
36003 break;
36004 default:
36005 break;
36006 }
36007 }
36008 }
36009 }
36010
36011 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36012 if (! pat)
36013 return const0_rtx;
36014 emit_insn (pat);
36015
36016 switch (fcode)
36017 {
36018 case IX86_BUILTIN_GATHER3DIV16SF:
36019 if (target == NULL_RTX)
36020 target = gen_reg_rtx (V8SFmode);
36021 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36022 break;
36023 case IX86_BUILTIN_GATHER3DIV16SI:
36024 if (target == NULL_RTX)
36025 target = gen_reg_rtx (V8SImode);
36026 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36027 break;
36028 case IX86_BUILTIN_GATHERDIV8SF:
36029 if (target == NULL_RTX)
36030 target = gen_reg_rtx (V4SFmode);
36031 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36032 break;
36033 case IX86_BUILTIN_GATHERDIV8SI:
36034 if (target == NULL_RTX)
36035 target = gen_reg_rtx (V4SImode);
36036 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36037 break;
36038 default:
36039 target = subtarget;
36040 break;
36041 }
36042 return target;
36043
36044 scatter_gen:
36045 arg0 = CALL_EXPR_ARG (exp, 0);
36046 arg1 = CALL_EXPR_ARG (exp, 1);
36047 arg2 = CALL_EXPR_ARG (exp, 2);
36048 arg3 = CALL_EXPR_ARG (exp, 3);
36049 arg4 = CALL_EXPR_ARG (exp, 4);
36050 op0 = expand_normal (arg0);
36051 op1 = expand_normal (arg1);
36052 op2 = expand_normal (arg2);
36053 op3 = expand_normal (arg3);
36054 op4 = expand_normal (arg4);
36055 mode1 = insn_data[icode].operand[1].mode;
36056 mode2 = insn_data[icode].operand[2].mode;
36057 mode3 = insn_data[icode].operand[3].mode;
36058 mode4 = insn_data[icode].operand[4].mode;
36059
36060 /* Force memory operand only with base register here. But we
36061 don't want to do it on memory operand for other builtin
36062 functions. */
36063 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36064
36065 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36066 op0 = copy_to_mode_reg (Pmode, op0);
36067
36068 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36069 {
36070 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36071 op1 = copy_to_mode_reg (mode1, op1);
36072 }
36073 else
36074 {
36075 op1 = copy_to_reg (op1);
36076 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36077 }
36078
36079 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36080 op2 = copy_to_mode_reg (mode2, op2);
36081
36082 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36083 op3 = copy_to_mode_reg (mode3, op3);
36084
36085 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36086 {
36087 error ("the last argument must be scale 1, 2, 4, 8");
36088 return const0_rtx;
36089 }
36090
36091 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36092 if (! pat)
36093 return const0_rtx;
36094
36095 emit_insn (pat);
36096 return 0;
36097
36098 vec_prefetch_gen:
36099 arg0 = CALL_EXPR_ARG (exp, 0);
36100 arg1 = CALL_EXPR_ARG (exp, 1);
36101 arg2 = CALL_EXPR_ARG (exp, 2);
36102 arg3 = CALL_EXPR_ARG (exp, 3);
36103 arg4 = CALL_EXPR_ARG (exp, 4);
36104 op0 = expand_normal (arg0);
36105 op1 = expand_normal (arg1);
36106 op2 = expand_normal (arg2);
36107 op3 = expand_normal (arg3);
36108 op4 = expand_normal (arg4);
36109 mode0 = insn_data[icode].operand[0].mode;
36110 mode1 = insn_data[icode].operand[1].mode;
36111 mode3 = insn_data[icode].operand[3].mode;
36112 mode4 = insn_data[icode].operand[4].mode;
36113
36114 if (GET_MODE (op0) == mode0
36115 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36116 {
36117 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36118 op0 = copy_to_mode_reg (mode0, op0);
36119 }
36120 else if (op0 != constm1_rtx)
36121 {
36122 op0 = copy_to_reg (op0);
36123 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36124 }
36125
36126 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36127 op1 = copy_to_mode_reg (mode1, op1);
36128
36129 /* Force memory operand only with base register here. But we
36130 don't want to do it on memory operand for other builtin
36131 functions. */
36132 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36133
36134 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36135 op2 = copy_to_mode_reg (Pmode, op2);
36136
36137 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36138 {
36139 error ("the forth argument must be scale 1, 2, 4, 8");
36140 return const0_rtx;
36141 }
36142
36143 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36144 {
36145 error ("incorrect hint operand");
36146 return const0_rtx;
36147 }
36148
36149 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36150 if (! pat)
36151 return const0_rtx;
36152
36153 emit_insn (pat);
36154
36155 return 0;
36156
36157 case IX86_BUILTIN_XABORT:
36158 icode = CODE_FOR_xabort;
36159 arg0 = CALL_EXPR_ARG (exp, 0);
36160 op0 = expand_normal (arg0);
36161 mode0 = insn_data[icode].operand[0].mode;
36162 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36163 {
36164 error ("the xabort's argument must be an 8-bit immediate");
36165 return const0_rtx;
36166 }
36167 emit_insn (gen_xabort (op0));
36168 return 0;
36169
36170 default:
36171 break;
36172 }
36173
36174 for (i = 0, d = bdesc_special_args;
36175 i < ARRAY_SIZE (bdesc_special_args);
36176 i++, d++)
36177 if (d->code == fcode)
36178 return ix86_expand_special_args_builtin (d, exp, target);
36179
36180 for (i = 0, d = bdesc_args;
36181 i < ARRAY_SIZE (bdesc_args);
36182 i++, d++)
36183 if (d->code == fcode)
36184 switch (fcode)
36185 {
36186 case IX86_BUILTIN_FABSQ:
36187 case IX86_BUILTIN_COPYSIGNQ:
36188 if (!TARGET_SSE)
36189 /* Emit a normal call if SSE isn't available. */
36190 return expand_call (exp, target, ignore);
36191 default:
36192 return ix86_expand_args_builtin (d, exp, target);
36193 }
36194
36195 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36196 if (d->code == fcode)
36197 return ix86_expand_sse_comi (d, exp, target);
36198
36199 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36200 if (d->code == fcode)
36201 return ix86_expand_round_builtin (d, exp, target);
36202
36203 for (i = 0, d = bdesc_pcmpestr;
36204 i < ARRAY_SIZE (bdesc_pcmpestr);
36205 i++, d++)
36206 if (d->code == fcode)
36207 return ix86_expand_sse_pcmpestr (d, exp, target);
36208
36209 for (i = 0, d = bdesc_pcmpistr;
36210 i < ARRAY_SIZE (bdesc_pcmpistr);
36211 i++, d++)
36212 if (d->code == fcode)
36213 return ix86_expand_sse_pcmpistr (d, exp, target);
36214
36215 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36216 if (d->code == fcode)
36217 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36218 (enum ix86_builtin_func_type)
36219 d->flag, d->comparison);
36220
36221 gcc_unreachable ();
36222 }
36223
36224 /* This returns the target-specific builtin with code CODE if
36225 current_function_decl has visibility on this builtin, which is checked
36226 using isa flags. Returns NULL_TREE otherwise. */
36227
36228 static tree ix86_get_builtin (enum ix86_builtins code)
36229 {
36230 struct cl_target_option *opts;
36231 tree target_tree = NULL_TREE;
36232
36233 /* Determine the isa flags of current_function_decl. */
36234
36235 if (current_function_decl)
36236 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36237
36238 if (target_tree == NULL)
36239 target_tree = target_option_default_node;
36240
36241 opts = TREE_TARGET_OPTION (target_tree);
36242
36243 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36244 return ix86_builtin_decl (code, true);
36245 else
36246 return NULL_TREE;
36247 }
36248
36249 /* Returns a function decl for a vectorized version of the builtin function
36250 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36251 if it is not available. */
36252
36253 static tree
36254 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36255 tree type_in)
36256 {
36257 enum machine_mode in_mode, out_mode;
36258 int in_n, out_n;
36259 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36260
36261 if (TREE_CODE (type_out) != VECTOR_TYPE
36262 || TREE_CODE (type_in) != VECTOR_TYPE
36263 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36264 return NULL_TREE;
36265
36266 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36267 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36268 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36269 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36270
36271 switch (fn)
36272 {
36273 case BUILT_IN_SQRT:
36274 if (out_mode == DFmode && in_mode == DFmode)
36275 {
36276 if (out_n == 2 && in_n == 2)
36277 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36278 else if (out_n == 4 && in_n == 4)
36279 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36280 else if (out_n == 8 && in_n == 8)
36281 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36282 }
36283 break;
36284
36285 case BUILT_IN_EXP2F:
36286 if (out_mode == SFmode && in_mode == SFmode)
36287 {
36288 if (out_n == 16 && in_n == 16)
36289 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36290 }
36291 break;
36292
36293 case BUILT_IN_SQRTF:
36294 if (out_mode == SFmode && in_mode == SFmode)
36295 {
36296 if (out_n == 4 && in_n == 4)
36297 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36298 else if (out_n == 8 && in_n == 8)
36299 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36300 else if (out_n == 16 && in_n == 16)
36301 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36302 }
36303 break;
36304
36305 case BUILT_IN_IFLOOR:
36306 case BUILT_IN_LFLOOR:
36307 case BUILT_IN_LLFLOOR:
36308 /* The round insn does not trap on denormals. */
36309 if (flag_trapping_math || !TARGET_ROUND)
36310 break;
36311
36312 if (out_mode == SImode && in_mode == DFmode)
36313 {
36314 if (out_n == 4 && in_n == 2)
36315 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36316 else if (out_n == 8 && in_n == 4)
36317 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36318 else if (out_n == 16 && in_n == 8)
36319 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36320 }
36321 break;
36322
36323 case BUILT_IN_IFLOORF:
36324 case BUILT_IN_LFLOORF:
36325 case BUILT_IN_LLFLOORF:
36326 /* The round insn does not trap on denormals. */
36327 if (flag_trapping_math || !TARGET_ROUND)
36328 break;
36329
36330 if (out_mode == SImode && in_mode == SFmode)
36331 {
36332 if (out_n == 4 && in_n == 4)
36333 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36334 else if (out_n == 8 && in_n == 8)
36335 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36336 }
36337 break;
36338
36339 case BUILT_IN_ICEIL:
36340 case BUILT_IN_LCEIL:
36341 case BUILT_IN_LLCEIL:
36342 /* The round insn does not trap on denormals. */
36343 if (flag_trapping_math || !TARGET_ROUND)
36344 break;
36345
36346 if (out_mode == SImode && in_mode == DFmode)
36347 {
36348 if (out_n == 4 && in_n == 2)
36349 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36350 else if (out_n == 8 && in_n == 4)
36351 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36352 else if (out_n == 16 && in_n == 8)
36353 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36354 }
36355 break;
36356
36357 case BUILT_IN_ICEILF:
36358 case BUILT_IN_LCEILF:
36359 case BUILT_IN_LLCEILF:
36360 /* The round insn does not trap on denormals. */
36361 if (flag_trapping_math || !TARGET_ROUND)
36362 break;
36363
36364 if (out_mode == SImode && in_mode == SFmode)
36365 {
36366 if (out_n == 4 && in_n == 4)
36367 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36368 else if (out_n == 8 && in_n == 8)
36369 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36370 }
36371 break;
36372
36373 case BUILT_IN_IRINT:
36374 case BUILT_IN_LRINT:
36375 case BUILT_IN_LLRINT:
36376 if (out_mode == SImode && in_mode == DFmode)
36377 {
36378 if (out_n == 4 && in_n == 2)
36379 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36380 else if (out_n == 8 && in_n == 4)
36381 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36382 }
36383 break;
36384
36385 case BUILT_IN_IRINTF:
36386 case BUILT_IN_LRINTF:
36387 case BUILT_IN_LLRINTF:
36388 if (out_mode == SImode && in_mode == SFmode)
36389 {
36390 if (out_n == 4 && in_n == 4)
36391 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36392 else if (out_n == 8 && in_n == 8)
36393 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36394 }
36395 break;
36396
36397 case BUILT_IN_IROUND:
36398 case BUILT_IN_LROUND:
36399 case BUILT_IN_LLROUND:
36400 /* The round insn does not trap on denormals. */
36401 if (flag_trapping_math || !TARGET_ROUND)
36402 break;
36403
36404 if (out_mode == SImode && in_mode == DFmode)
36405 {
36406 if (out_n == 4 && in_n == 2)
36407 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36408 else if (out_n == 8 && in_n == 4)
36409 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36410 else if (out_n == 16 && in_n == 8)
36411 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36412 }
36413 break;
36414
36415 case BUILT_IN_IROUNDF:
36416 case BUILT_IN_LROUNDF:
36417 case BUILT_IN_LLROUNDF:
36418 /* The round insn does not trap on denormals. */
36419 if (flag_trapping_math || !TARGET_ROUND)
36420 break;
36421
36422 if (out_mode == SImode && in_mode == SFmode)
36423 {
36424 if (out_n == 4 && in_n == 4)
36425 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36426 else if (out_n == 8 && in_n == 8)
36427 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36428 }
36429 break;
36430
36431 case BUILT_IN_COPYSIGN:
36432 if (out_mode == DFmode && in_mode == DFmode)
36433 {
36434 if (out_n == 2 && in_n == 2)
36435 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36436 else if (out_n == 4 && in_n == 4)
36437 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36438 else if (out_n == 8 && in_n == 8)
36439 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36440 }
36441 break;
36442
36443 case BUILT_IN_COPYSIGNF:
36444 if (out_mode == SFmode && in_mode == SFmode)
36445 {
36446 if (out_n == 4 && in_n == 4)
36447 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36448 else if (out_n == 8 && in_n == 8)
36449 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36450 else if (out_n == 16 && in_n == 16)
36451 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36452 }
36453 break;
36454
36455 case BUILT_IN_FLOOR:
36456 /* The round insn does not trap on denormals. */
36457 if (flag_trapping_math || !TARGET_ROUND)
36458 break;
36459
36460 if (out_mode == DFmode && in_mode == DFmode)
36461 {
36462 if (out_n == 2 && in_n == 2)
36463 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36464 else if (out_n == 4 && in_n == 4)
36465 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36466 }
36467 break;
36468
36469 case BUILT_IN_FLOORF:
36470 /* The round insn does not trap on denormals. */
36471 if (flag_trapping_math || !TARGET_ROUND)
36472 break;
36473
36474 if (out_mode == SFmode && in_mode == SFmode)
36475 {
36476 if (out_n == 4 && in_n == 4)
36477 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36478 else if (out_n == 8 && in_n == 8)
36479 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36480 }
36481 break;
36482
36483 case BUILT_IN_CEIL:
36484 /* The round insn does not trap on denormals. */
36485 if (flag_trapping_math || !TARGET_ROUND)
36486 break;
36487
36488 if (out_mode == DFmode && in_mode == DFmode)
36489 {
36490 if (out_n == 2 && in_n == 2)
36491 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36492 else if (out_n == 4 && in_n == 4)
36493 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36494 }
36495 break;
36496
36497 case BUILT_IN_CEILF:
36498 /* The round insn does not trap on denormals. */
36499 if (flag_trapping_math || !TARGET_ROUND)
36500 break;
36501
36502 if (out_mode == SFmode && in_mode == SFmode)
36503 {
36504 if (out_n == 4 && in_n == 4)
36505 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36506 else if (out_n == 8 && in_n == 8)
36507 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36508 }
36509 break;
36510
36511 case BUILT_IN_TRUNC:
36512 /* The round insn does not trap on denormals. */
36513 if (flag_trapping_math || !TARGET_ROUND)
36514 break;
36515
36516 if (out_mode == DFmode && in_mode == DFmode)
36517 {
36518 if (out_n == 2 && in_n == 2)
36519 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36520 else if (out_n == 4 && in_n == 4)
36521 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36522 }
36523 break;
36524
36525 case BUILT_IN_TRUNCF:
36526 /* The round insn does not trap on denormals. */
36527 if (flag_trapping_math || !TARGET_ROUND)
36528 break;
36529
36530 if (out_mode == SFmode && in_mode == SFmode)
36531 {
36532 if (out_n == 4 && in_n == 4)
36533 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36534 else if (out_n == 8 && in_n == 8)
36535 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36536 }
36537 break;
36538
36539 case BUILT_IN_RINT:
36540 /* The round insn does not trap on denormals. */
36541 if (flag_trapping_math || !TARGET_ROUND)
36542 break;
36543
36544 if (out_mode == DFmode && in_mode == DFmode)
36545 {
36546 if (out_n == 2 && in_n == 2)
36547 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36548 else if (out_n == 4 && in_n == 4)
36549 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36550 }
36551 break;
36552
36553 case BUILT_IN_RINTF:
36554 /* The round insn does not trap on denormals. */
36555 if (flag_trapping_math || !TARGET_ROUND)
36556 break;
36557
36558 if (out_mode == SFmode && in_mode == SFmode)
36559 {
36560 if (out_n == 4 && in_n == 4)
36561 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36562 else if (out_n == 8 && in_n == 8)
36563 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36564 }
36565 break;
36566
36567 case BUILT_IN_ROUND:
36568 /* The round insn does not trap on denormals. */
36569 if (flag_trapping_math || !TARGET_ROUND)
36570 break;
36571
36572 if (out_mode == DFmode && in_mode == DFmode)
36573 {
36574 if (out_n == 2 && in_n == 2)
36575 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36576 else if (out_n == 4 && in_n == 4)
36577 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36578 }
36579 break;
36580
36581 case BUILT_IN_ROUNDF:
36582 /* The round insn does not trap on denormals. */
36583 if (flag_trapping_math || !TARGET_ROUND)
36584 break;
36585
36586 if (out_mode == SFmode && in_mode == SFmode)
36587 {
36588 if (out_n == 4 && in_n == 4)
36589 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36590 else if (out_n == 8 && in_n == 8)
36591 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36592 }
36593 break;
36594
36595 case BUILT_IN_FMA:
36596 if (out_mode == DFmode && in_mode == DFmode)
36597 {
36598 if (out_n == 2 && in_n == 2)
36599 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36600 if (out_n == 4 && in_n == 4)
36601 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36602 }
36603 break;
36604
36605 case BUILT_IN_FMAF:
36606 if (out_mode == SFmode && in_mode == SFmode)
36607 {
36608 if (out_n == 4 && in_n == 4)
36609 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36610 if (out_n == 8 && in_n == 8)
36611 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36612 }
36613 break;
36614
36615 default:
36616 break;
36617 }
36618
36619 /* Dispatch to a handler for a vectorization library. */
36620 if (ix86_veclib_handler)
36621 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36622 type_in);
36623
36624 return NULL_TREE;
36625 }
36626
36627 /* Handler for an SVML-style interface to
36628 a library with vectorized intrinsics. */
36629
36630 static tree
36631 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36632 {
36633 char name[20];
36634 tree fntype, new_fndecl, args;
36635 unsigned arity;
36636 const char *bname;
36637 enum machine_mode el_mode, in_mode;
36638 int n, in_n;
36639
36640 /* The SVML is suitable for unsafe math only. */
36641 if (!flag_unsafe_math_optimizations)
36642 return NULL_TREE;
36643
36644 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36645 n = TYPE_VECTOR_SUBPARTS (type_out);
36646 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36647 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36648 if (el_mode != in_mode
36649 || n != in_n)
36650 return NULL_TREE;
36651
36652 switch (fn)
36653 {
36654 case BUILT_IN_EXP:
36655 case BUILT_IN_LOG:
36656 case BUILT_IN_LOG10:
36657 case BUILT_IN_POW:
36658 case BUILT_IN_TANH:
36659 case BUILT_IN_TAN:
36660 case BUILT_IN_ATAN:
36661 case BUILT_IN_ATAN2:
36662 case BUILT_IN_ATANH:
36663 case BUILT_IN_CBRT:
36664 case BUILT_IN_SINH:
36665 case BUILT_IN_SIN:
36666 case BUILT_IN_ASINH:
36667 case BUILT_IN_ASIN:
36668 case BUILT_IN_COSH:
36669 case BUILT_IN_COS:
36670 case BUILT_IN_ACOSH:
36671 case BUILT_IN_ACOS:
36672 if (el_mode != DFmode || n != 2)
36673 return NULL_TREE;
36674 break;
36675
36676 case BUILT_IN_EXPF:
36677 case BUILT_IN_LOGF:
36678 case BUILT_IN_LOG10F:
36679 case BUILT_IN_POWF:
36680 case BUILT_IN_TANHF:
36681 case BUILT_IN_TANF:
36682 case BUILT_IN_ATANF:
36683 case BUILT_IN_ATAN2F:
36684 case BUILT_IN_ATANHF:
36685 case BUILT_IN_CBRTF:
36686 case BUILT_IN_SINHF:
36687 case BUILT_IN_SINF:
36688 case BUILT_IN_ASINHF:
36689 case BUILT_IN_ASINF:
36690 case BUILT_IN_COSHF:
36691 case BUILT_IN_COSF:
36692 case BUILT_IN_ACOSHF:
36693 case BUILT_IN_ACOSF:
36694 if (el_mode != SFmode || n != 4)
36695 return NULL_TREE;
36696 break;
36697
36698 default:
36699 return NULL_TREE;
36700 }
36701
36702 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36703
36704 if (fn == BUILT_IN_LOGF)
36705 strcpy (name, "vmlsLn4");
36706 else if (fn == BUILT_IN_LOG)
36707 strcpy (name, "vmldLn2");
36708 else if (n == 4)
36709 {
36710 sprintf (name, "vmls%s", bname+10);
36711 name[strlen (name)-1] = '4';
36712 }
36713 else
36714 sprintf (name, "vmld%s2", bname+10);
36715
36716 /* Convert to uppercase. */
36717 name[4] &= ~0x20;
36718
36719 arity = 0;
36720 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36721 args;
36722 args = TREE_CHAIN (args))
36723 arity++;
36724
36725 if (arity == 1)
36726 fntype = build_function_type_list (type_out, type_in, NULL);
36727 else
36728 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36729
36730 /* Build a function declaration for the vectorized function. */
36731 new_fndecl = build_decl (BUILTINS_LOCATION,
36732 FUNCTION_DECL, get_identifier (name), fntype);
36733 TREE_PUBLIC (new_fndecl) = 1;
36734 DECL_EXTERNAL (new_fndecl) = 1;
36735 DECL_IS_NOVOPS (new_fndecl) = 1;
36736 TREE_READONLY (new_fndecl) = 1;
36737
36738 return new_fndecl;
36739 }
36740
36741 /* Handler for an ACML-style interface to
36742 a library with vectorized intrinsics. */
36743
36744 static tree
36745 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36746 {
36747 char name[20] = "__vr.._";
36748 tree fntype, new_fndecl, args;
36749 unsigned arity;
36750 const char *bname;
36751 enum machine_mode el_mode, in_mode;
36752 int n, in_n;
36753
36754 /* The ACML is 64bits only and suitable for unsafe math only as
36755 it does not correctly support parts of IEEE with the required
36756 precision such as denormals. */
36757 if (!TARGET_64BIT
36758 || !flag_unsafe_math_optimizations)
36759 return NULL_TREE;
36760
36761 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36762 n = TYPE_VECTOR_SUBPARTS (type_out);
36763 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36764 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36765 if (el_mode != in_mode
36766 || n != in_n)
36767 return NULL_TREE;
36768
36769 switch (fn)
36770 {
36771 case BUILT_IN_SIN:
36772 case BUILT_IN_COS:
36773 case BUILT_IN_EXP:
36774 case BUILT_IN_LOG:
36775 case BUILT_IN_LOG2:
36776 case BUILT_IN_LOG10:
36777 name[4] = 'd';
36778 name[5] = '2';
36779 if (el_mode != DFmode
36780 || n != 2)
36781 return NULL_TREE;
36782 break;
36783
36784 case BUILT_IN_SINF:
36785 case BUILT_IN_COSF:
36786 case BUILT_IN_EXPF:
36787 case BUILT_IN_POWF:
36788 case BUILT_IN_LOGF:
36789 case BUILT_IN_LOG2F:
36790 case BUILT_IN_LOG10F:
36791 name[4] = 's';
36792 name[5] = '4';
36793 if (el_mode != SFmode
36794 || n != 4)
36795 return NULL_TREE;
36796 break;
36797
36798 default:
36799 return NULL_TREE;
36800 }
36801
36802 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36803 sprintf (name + 7, "%s", bname+10);
36804
36805 arity = 0;
36806 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36807 args;
36808 args = TREE_CHAIN (args))
36809 arity++;
36810
36811 if (arity == 1)
36812 fntype = build_function_type_list (type_out, type_in, NULL);
36813 else
36814 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36815
36816 /* Build a function declaration for the vectorized function. */
36817 new_fndecl = build_decl (BUILTINS_LOCATION,
36818 FUNCTION_DECL, get_identifier (name), fntype);
36819 TREE_PUBLIC (new_fndecl) = 1;
36820 DECL_EXTERNAL (new_fndecl) = 1;
36821 DECL_IS_NOVOPS (new_fndecl) = 1;
36822 TREE_READONLY (new_fndecl) = 1;
36823
36824 return new_fndecl;
36825 }
36826
36827 /* Returns a decl of a function that implements gather load with
36828 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36829 Return NULL_TREE if it is not available. */
36830
36831 static tree
36832 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36833 const_tree index_type, int scale)
36834 {
36835 bool si;
36836 enum ix86_builtins code;
36837
36838 if (! TARGET_AVX2)
36839 return NULL_TREE;
36840
36841 if ((TREE_CODE (index_type) != INTEGER_TYPE
36842 && !POINTER_TYPE_P (index_type))
36843 || (TYPE_MODE (index_type) != SImode
36844 && TYPE_MODE (index_type) != DImode))
36845 return NULL_TREE;
36846
36847 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36848 return NULL_TREE;
36849
36850 /* v*gather* insn sign extends index to pointer mode. */
36851 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36852 && TYPE_UNSIGNED (index_type))
36853 return NULL_TREE;
36854
36855 if (scale <= 0
36856 || scale > 8
36857 || (scale & (scale - 1)) != 0)
36858 return NULL_TREE;
36859
36860 si = TYPE_MODE (index_type) == SImode;
36861 switch (TYPE_MODE (mem_vectype))
36862 {
36863 case V2DFmode:
36864 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36865 break;
36866 case V4DFmode:
36867 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36868 break;
36869 case V2DImode:
36870 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36871 break;
36872 case V4DImode:
36873 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36874 break;
36875 case V4SFmode:
36876 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36877 break;
36878 case V8SFmode:
36879 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36880 break;
36881 case V4SImode:
36882 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36883 break;
36884 case V8SImode:
36885 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36886 break;
36887 case V8DFmode:
36888 if (TARGET_AVX512F)
36889 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36890 else
36891 return NULL_TREE;
36892 break;
36893 case V8DImode:
36894 if (TARGET_AVX512F)
36895 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36896 else
36897 return NULL_TREE;
36898 break;
36899 case V16SFmode:
36900 if (TARGET_AVX512F)
36901 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36902 else
36903 return NULL_TREE;
36904 break;
36905 case V16SImode:
36906 if (TARGET_AVX512F)
36907 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36908 else
36909 return NULL_TREE;
36910 break;
36911 default:
36912 return NULL_TREE;
36913 }
36914
36915 return ix86_get_builtin (code);
36916 }
36917
36918 /* Returns a code for a target-specific builtin that implements
36919 reciprocal of the function, or NULL_TREE if not available. */
36920
36921 static tree
36922 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36923 bool sqrt ATTRIBUTE_UNUSED)
36924 {
36925 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36926 && flag_finite_math_only && !flag_trapping_math
36927 && flag_unsafe_math_optimizations))
36928 return NULL_TREE;
36929
36930 if (md_fn)
36931 /* Machine dependent builtins. */
36932 switch (fn)
36933 {
36934 /* Vectorized version of sqrt to rsqrt conversion. */
36935 case IX86_BUILTIN_SQRTPS_NR:
36936 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36937
36938 case IX86_BUILTIN_SQRTPS_NR256:
36939 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36940
36941 default:
36942 return NULL_TREE;
36943 }
36944 else
36945 /* Normal builtins. */
36946 switch (fn)
36947 {
36948 /* Sqrt to rsqrt conversion. */
36949 case BUILT_IN_SQRTF:
36950 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36951
36952 default:
36953 return NULL_TREE;
36954 }
36955 }
36956 \f
36957 /* Helper for avx_vpermilps256_operand et al. This is also used by
36958 the expansion functions to turn the parallel back into a mask.
36959 The return value is 0 for no match and the imm8+1 for a match. */
36960
36961 int
36962 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36963 {
36964 unsigned i, nelt = GET_MODE_NUNITS (mode);
36965 unsigned mask = 0;
36966 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36967
36968 if (XVECLEN (par, 0) != (int) nelt)
36969 return 0;
36970
36971 /* Validate that all of the elements are constants, and not totally
36972 out of range. Copy the data into an integral array to make the
36973 subsequent checks easier. */
36974 for (i = 0; i < nelt; ++i)
36975 {
36976 rtx er = XVECEXP (par, 0, i);
36977 unsigned HOST_WIDE_INT ei;
36978
36979 if (!CONST_INT_P (er))
36980 return 0;
36981 ei = INTVAL (er);
36982 if (ei >= nelt)
36983 return 0;
36984 ipar[i] = ei;
36985 }
36986
36987 switch (mode)
36988 {
36989 case V8DFmode:
36990 /* In the 512-bit DFmode case, we can only move elements within
36991 a 128-bit lane. First fill the second part of the mask,
36992 then fallthru. */
36993 for (i = 4; i < 6; ++i)
36994 {
36995 if (ipar[i] < 4 || ipar[i] >= 6)
36996 return 0;
36997 mask |= (ipar[i] - 4) << i;
36998 }
36999 for (i = 6; i < 8; ++i)
37000 {
37001 if (ipar[i] < 6)
37002 return 0;
37003 mask |= (ipar[i] - 6) << i;
37004 }
37005 /* FALLTHRU */
37006
37007 case V4DFmode:
37008 /* In the 256-bit DFmode case, we can only move elements within
37009 a 128-bit lane. */
37010 for (i = 0; i < 2; ++i)
37011 {
37012 if (ipar[i] >= 2)
37013 return 0;
37014 mask |= ipar[i] << i;
37015 }
37016 for (i = 2; i < 4; ++i)
37017 {
37018 if (ipar[i] < 2)
37019 return 0;
37020 mask |= (ipar[i] - 2) << i;
37021 }
37022 break;
37023
37024 case V16SFmode:
37025 /* In 512 bit SFmode case, permutation in the upper 256 bits
37026 must mirror the permutation in the lower 256-bits. */
37027 for (i = 0; i < 8; ++i)
37028 if (ipar[i] + 8 != ipar[i + 8])
37029 return 0;
37030 /* FALLTHRU */
37031
37032 case V8SFmode:
37033 /* In 256 bit SFmode case, we have full freedom of
37034 movement within the low 128-bit lane, but the high 128-bit
37035 lane must mirror the exact same pattern. */
37036 for (i = 0; i < 4; ++i)
37037 if (ipar[i] + 4 != ipar[i + 4])
37038 return 0;
37039 nelt = 4;
37040 /* FALLTHRU */
37041
37042 case V2DFmode:
37043 case V4SFmode:
37044 /* In the 128-bit case, we've full freedom in the placement of
37045 the elements from the source operand. */
37046 for (i = 0; i < nelt; ++i)
37047 mask |= ipar[i] << (i * (nelt / 2));
37048 break;
37049
37050 default:
37051 gcc_unreachable ();
37052 }
37053
37054 /* Make sure success has a non-zero value by adding one. */
37055 return mask + 1;
37056 }
37057
37058 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37059 the expansion functions to turn the parallel back into a mask.
37060 The return value is 0 for no match and the imm8+1 for a match. */
37061
37062 int
37063 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37064 {
37065 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37066 unsigned mask = 0;
37067 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37068
37069 if (XVECLEN (par, 0) != (int) nelt)
37070 return 0;
37071
37072 /* Validate that all of the elements are constants, and not totally
37073 out of range. Copy the data into an integral array to make the
37074 subsequent checks easier. */
37075 for (i = 0; i < nelt; ++i)
37076 {
37077 rtx er = XVECEXP (par, 0, i);
37078 unsigned HOST_WIDE_INT ei;
37079
37080 if (!CONST_INT_P (er))
37081 return 0;
37082 ei = INTVAL (er);
37083 if (ei >= 2 * nelt)
37084 return 0;
37085 ipar[i] = ei;
37086 }
37087
37088 /* Validate that the halves of the permute are halves. */
37089 for (i = 0; i < nelt2 - 1; ++i)
37090 if (ipar[i] + 1 != ipar[i + 1])
37091 return 0;
37092 for (i = nelt2; i < nelt - 1; ++i)
37093 if (ipar[i] + 1 != ipar[i + 1])
37094 return 0;
37095
37096 /* Reconstruct the mask. */
37097 for (i = 0; i < 2; ++i)
37098 {
37099 unsigned e = ipar[i * nelt2];
37100 if (e % nelt2)
37101 return 0;
37102 e /= nelt2;
37103 mask |= e << (i * 4);
37104 }
37105
37106 /* Make sure success has a non-zero value by adding one. */
37107 return mask + 1;
37108 }
37109 \f
37110 /* Return a register priority for hard reg REGNO. */
37111 static int
37112 ix86_register_priority (int hard_regno)
37113 {
37114 /* ebp and r13 as the base always wants a displacement, r12 as the
37115 base always wants an index. So discourage their usage in an
37116 address. */
37117 if (hard_regno == R12_REG || hard_regno == R13_REG)
37118 return 0;
37119 if (hard_regno == BP_REG)
37120 return 1;
37121 /* New x86-64 int registers result in bigger code size. Discourage
37122 them. */
37123 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37124 return 2;
37125 /* New x86-64 SSE registers result in bigger code size. Discourage
37126 them. */
37127 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37128 return 2;
37129 /* Usage of AX register results in smaller code. Prefer it. */
37130 if (hard_regno == 0)
37131 return 4;
37132 return 3;
37133 }
37134
37135 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37136
37137 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37138 QImode must go into class Q_REGS.
37139 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37140 movdf to do mem-to-mem moves through integer regs. */
37141
37142 static reg_class_t
37143 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37144 {
37145 enum machine_mode mode = GET_MODE (x);
37146
37147 /* We're only allowed to return a subclass of CLASS. Many of the
37148 following checks fail for NO_REGS, so eliminate that early. */
37149 if (regclass == NO_REGS)
37150 return NO_REGS;
37151
37152 /* All classes can load zeros. */
37153 if (x == CONST0_RTX (mode))
37154 return regclass;
37155
37156 /* Force constants into memory if we are loading a (nonzero) constant into
37157 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37158 instructions to load from a constant. */
37159 if (CONSTANT_P (x)
37160 && (MAYBE_MMX_CLASS_P (regclass)
37161 || MAYBE_SSE_CLASS_P (regclass)
37162 || MAYBE_MASK_CLASS_P (regclass)))
37163 return NO_REGS;
37164
37165 /* Prefer SSE regs only, if we can use them for math. */
37166 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37167 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37168
37169 /* Floating-point constants need more complex checks. */
37170 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37171 {
37172 /* General regs can load everything. */
37173 if (reg_class_subset_p (regclass, GENERAL_REGS))
37174 return regclass;
37175
37176 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37177 zero above. We only want to wind up preferring 80387 registers if
37178 we plan on doing computation with them. */
37179 if (TARGET_80387
37180 && standard_80387_constant_p (x) > 0)
37181 {
37182 /* Limit class to non-sse. */
37183 if (regclass == FLOAT_SSE_REGS)
37184 return FLOAT_REGS;
37185 if (regclass == FP_TOP_SSE_REGS)
37186 return FP_TOP_REG;
37187 if (regclass == FP_SECOND_SSE_REGS)
37188 return FP_SECOND_REG;
37189 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37190 return regclass;
37191 }
37192
37193 return NO_REGS;
37194 }
37195
37196 /* Generally when we see PLUS here, it's the function invariant
37197 (plus soft-fp const_int). Which can only be computed into general
37198 regs. */
37199 if (GET_CODE (x) == PLUS)
37200 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37201
37202 /* QImode constants are easy to load, but non-constant QImode data
37203 must go into Q_REGS. */
37204 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37205 {
37206 if (reg_class_subset_p (regclass, Q_REGS))
37207 return regclass;
37208 if (reg_class_subset_p (Q_REGS, regclass))
37209 return Q_REGS;
37210 return NO_REGS;
37211 }
37212
37213 return regclass;
37214 }
37215
37216 /* Discourage putting floating-point values in SSE registers unless
37217 SSE math is being used, and likewise for the 387 registers. */
37218 static reg_class_t
37219 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37220 {
37221 enum machine_mode mode = GET_MODE (x);
37222
37223 /* Restrict the output reload class to the register bank that we are doing
37224 math on. If we would like not to return a subset of CLASS, reject this
37225 alternative: if reload cannot do this, it will still use its choice. */
37226 mode = GET_MODE (x);
37227 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37228 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37229
37230 if (X87_FLOAT_MODE_P (mode))
37231 {
37232 if (regclass == FP_TOP_SSE_REGS)
37233 return FP_TOP_REG;
37234 else if (regclass == FP_SECOND_SSE_REGS)
37235 return FP_SECOND_REG;
37236 else
37237 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37238 }
37239
37240 return regclass;
37241 }
37242
37243 static reg_class_t
37244 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37245 enum machine_mode mode, secondary_reload_info *sri)
37246 {
37247 /* Double-word spills from general registers to non-offsettable memory
37248 references (zero-extended addresses) require special handling. */
37249 if (TARGET_64BIT
37250 && MEM_P (x)
37251 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37252 && INTEGER_CLASS_P (rclass)
37253 && !offsettable_memref_p (x))
37254 {
37255 sri->icode = (in_p
37256 ? CODE_FOR_reload_noff_load
37257 : CODE_FOR_reload_noff_store);
37258 /* Add the cost of moving address to a temporary. */
37259 sri->extra_cost = 1;
37260
37261 return NO_REGS;
37262 }
37263
37264 /* QImode spills from non-QI registers require
37265 intermediate register on 32bit targets. */
37266 if (mode == QImode
37267 && (MAYBE_MASK_CLASS_P (rclass)
37268 || (!TARGET_64BIT && !in_p
37269 && INTEGER_CLASS_P (rclass)
37270 && MAYBE_NON_Q_CLASS_P (rclass))))
37271 {
37272 int regno;
37273
37274 if (REG_P (x))
37275 regno = REGNO (x);
37276 else
37277 regno = -1;
37278
37279 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37280 regno = true_regnum (x);
37281
37282 /* Return Q_REGS if the operand is in memory. */
37283 if (regno == -1)
37284 return Q_REGS;
37285 }
37286
37287 /* This condition handles corner case where an expression involving
37288 pointers gets vectorized. We're trying to use the address of a
37289 stack slot as a vector initializer.
37290
37291 (set (reg:V2DI 74 [ vect_cst_.2 ])
37292 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37293
37294 Eventually frame gets turned into sp+offset like this:
37295
37296 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37297 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37298 (const_int 392 [0x188]))))
37299
37300 That later gets turned into:
37301
37302 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37303 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37304 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37305
37306 We'll have the following reload recorded:
37307
37308 Reload 0: reload_in (DI) =
37309 (plus:DI (reg/f:DI 7 sp)
37310 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37311 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37312 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37313 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37314 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37315 reload_reg_rtx: (reg:V2DI 22 xmm1)
37316
37317 Which isn't going to work since SSE instructions can't handle scalar
37318 additions. Returning GENERAL_REGS forces the addition into integer
37319 register and reload can handle subsequent reloads without problems. */
37320
37321 if (in_p && GET_CODE (x) == PLUS
37322 && SSE_CLASS_P (rclass)
37323 && SCALAR_INT_MODE_P (mode))
37324 return GENERAL_REGS;
37325
37326 return NO_REGS;
37327 }
37328
37329 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37330
37331 static bool
37332 ix86_class_likely_spilled_p (reg_class_t rclass)
37333 {
37334 switch (rclass)
37335 {
37336 case AREG:
37337 case DREG:
37338 case CREG:
37339 case BREG:
37340 case AD_REGS:
37341 case SIREG:
37342 case DIREG:
37343 case SSE_FIRST_REG:
37344 case FP_TOP_REG:
37345 case FP_SECOND_REG:
37346 return true;
37347
37348 default:
37349 break;
37350 }
37351
37352 return false;
37353 }
37354
37355 /* If we are copying between general and FP registers, we need a memory
37356 location. The same is true for SSE and MMX registers.
37357
37358 To optimize register_move_cost performance, allow inline variant.
37359
37360 The macro can't work reliably when one of the CLASSES is class containing
37361 registers from multiple units (SSE, MMX, integer). We avoid this by never
37362 combining those units in single alternative in the machine description.
37363 Ensure that this constraint holds to avoid unexpected surprises.
37364
37365 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37366 enforce these sanity checks. */
37367
37368 static inline bool
37369 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37370 enum machine_mode mode, int strict)
37371 {
37372 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37373 return false;
37374 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37375 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37376 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37377 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37378 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37379 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37380 {
37381 gcc_assert (!strict || lra_in_progress);
37382 return true;
37383 }
37384
37385 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37386 return true;
37387
37388 /* ??? This is a lie. We do have moves between mmx/general, and for
37389 mmx/sse2. But by saying we need secondary memory we discourage the
37390 register allocator from using the mmx registers unless needed. */
37391 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37392 return true;
37393
37394 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37395 {
37396 /* SSE1 doesn't have any direct moves from other classes. */
37397 if (!TARGET_SSE2)
37398 return true;
37399
37400 /* If the target says that inter-unit moves are more expensive
37401 than moving through memory, then don't generate them. */
37402 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37403 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37404 return true;
37405
37406 /* Between SSE and general, we have moves no larger than word size. */
37407 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37408 return true;
37409 }
37410
37411 return false;
37412 }
37413
37414 bool
37415 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37416 enum machine_mode mode, int strict)
37417 {
37418 return inline_secondary_memory_needed (class1, class2, mode, strict);
37419 }
37420
37421 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37422
37423 On the 80386, this is the size of MODE in words,
37424 except in the FP regs, where a single reg is always enough. */
37425
37426 static unsigned char
37427 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37428 {
37429 if (MAYBE_INTEGER_CLASS_P (rclass))
37430 {
37431 if (mode == XFmode)
37432 return (TARGET_64BIT ? 2 : 3);
37433 else if (mode == XCmode)
37434 return (TARGET_64BIT ? 4 : 6);
37435 else
37436 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37437 }
37438 else
37439 {
37440 if (COMPLEX_MODE_P (mode))
37441 return 2;
37442 else
37443 return 1;
37444 }
37445 }
37446
37447 /* Return true if the registers in CLASS cannot represent the change from
37448 modes FROM to TO. */
37449
37450 bool
37451 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37452 enum reg_class regclass)
37453 {
37454 if (from == to)
37455 return false;
37456
37457 /* x87 registers can't do subreg at all, as all values are reformatted
37458 to extended precision. */
37459 if (MAYBE_FLOAT_CLASS_P (regclass))
37460 return true;
37461
37462 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37463 {
37464 /* Vector registers do not support QI or HImode loads. If we don't
37465 disallow a change to these modes, reload will assume it's ok to
37466 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37467 the vec_dupv4hi pattern. */
37468 if (GET_MODE_SIZE (from) < 4)
37469 return true;
37470
37471 /* Vector registers do not support subreg with nonzero offsets, which
37472 are otherwise valid for integer registers. Since we can't see
37473 whether we have a nonzero offset from here, prohibit all
37474 nonparadoxical subregs changing size. */
37475 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37476 return true;
37477 }
37478
37479 return false;
37480 }
37481
37482 /* Return the cost of moving data of mode M between a
37483 register and memory. A value of 2 is the default; this cost is
37484 relative to those in `REGISTER_MOVE_COST'.
37485
37486 This function is used extensively by register_move_cost that is used to
37487 build tables at startup. Make it inline in this case.
37488 When IN is 2, return maximum of in and out move cost.
37489
37490 If moving between registers and memory is more expensive than
37491 between two registers, you should define this macro to express the
37492 relative cost.
37493
37494 Model also increased moving costs of QImode registers in non
37495 Q_REGS classes.
37496 */
37497 static inline int
37498 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37499 int in)
37500 {
37501 int cost;
37502 if (FLOAT_CLASS_P (regclass))
37503 {
37504 int index;
37505 switch (mode)
37506 {
37507 case SFmode:
37508 index = 0;
37509 break;
37510 case DFmode:
37511 index = 1;
37512 break;
37513 case XFmode:
37514 index = 2;
37515 break;
37516 default:
37517 return 100;
37518 }
37519 if (in == 2)
37520 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37521 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37522 }
37523 if (SSE_CLASS_P (regclass))
37524 {
37525 int index;
37526 switch (GET_MODE_SIZE (mode))
37527 {
37528 case 4:
37529 index = 0;
37530 break;
37531 case 8:
37532 index = 1;
37533 break;
37534 case 16:
37535 index = 2;
37536 break;
37537 default:
37538 return 100;
37539 }
37540 if (in == 2)
37541 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37542 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37543 }
37544 if (MMX_CLASS_P (regclass))
37545 {
37546 int index;
37547 switch (GET_MODE_SIZE (mode))
37548 {
37549 case 4:
37550 index = 0;
37551 break;
37552 case 8:
37553 index = 1;
37554 break;
37555 default:
37556 return 100;
37557 }
37558 if (in)
37559 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37560 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37561 }
37562 switch (GET_MODE_SIZE (mode))
37563 {
37564 case 1:
37565 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37566 {
37567 if (!in)
37568 return ix86_cost->int_store[0];
37569 if (TARGET_PARTIAL_REG_DEPENDENCY
37570 && optimize_function_for_speed_p (cfun))
37571 cost = ix86_cost->movzbl_load;
37572 else
37573 cost = ix86_cost->int_load[0];
37574 if (in == 2)
37575 return MAX (cost, ix86_cost->int_store[0]);
37576 return cost;
37577 }
37578 else
37579 {
37580 if (in == 2)
37581 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37582 if (in)
37583 return ix86_cost->movzbl_load;
37584 else
37585 return ix86_cost->int_store[0] + 4;
37586 }
37587 break;
37588 case 2:
37589 if (in == 2)
37590 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37591 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37592 default:
37593 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37594 if (mode == TFmode)
37595 mode = XFmode;
37596 if (in == 2)
37597 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37598 else if (in)
37599 cost = ix86_cost->int_load[2];
37600 else
37601 cost = ix86_cost->int_store[2];
37602 return (cost * (((int) GET_MODE_SIZE (mode)
37603 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37604 }
37605 }
37606
37607 static int
37608 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37609 bool in)
37610 {
37611 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37612 }
37613
37614
37615 /* Return the cost of moving data from a register in class CLASS1 to
37616 one in class CLASS2.
37617
37618 It is not required that the cost always equal 2 when FROM is the same as TO;
37619 on some machines it is expensive to move between registers if they are not
37620 general registers. */
37621
37622 static int
37623 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37624 reg_class_t class2_i)
37625 {
37626 enum reg_class class1 = (enum reg_class) class1_i;
37627 enum reg_class class2 = (enum reg_class) class2_i;
37628
37629 /* In case we require secondary memory, compute cost of the store followed
37630 by load. In order to avoid bad register allocation choices, we need
37631 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37632
37633 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37634 {
37635 int cost = 1;
37636
37637 cost += inline_memory_move_cost (mode, class1, 2);
37638 cost += inline_memory_move_cost (mode, class2, 2);
37639
37640 /* In case of copying from general_purpose_register we may emit multiple
37641 stores followed by single load causing memory size mismatch stall.
37642 Count this as arbitrarily high cost of 20. */
37643 if (targetm.class_max_nregs (class1, mode)
37644 > targetm.class_max_nregs (class2, mode))
37645 cost += 20;
37646
37647 /* In the case of FP/MMX moves, the registers actually overlap, and we
37648 have to switch modes in order to treat them differently. */
37649 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37650 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37651 cost += 20;
37652
37653 return cost;
37654 }
37655
37656 /* Moves between SSE/MMX and integer unit are expensive. */
37657 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37658 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37659
37660 /* ??? By keeping returned value relatively high, we limit the number
37661 of moves between integer and MMX/SSE registers for all targets.
37662 Additionally, high value prevents problem with x86_modes_tieable_p(),
37663 where integer modes in MMX/SSE registers are not tieable
37664 because of missing QImode and HImode moves to, from or between
37665 MMX/SSE registers. */
37666 return MAX (8, ix86_cost->mmxsse_to_integer);
37667
37668 if (MAYBE_FLOAT_CLASS_P (class1))
37669 return ix86_cost->fp_move;
37670 if (MAYBE_SSE_CLASS_P (class1))
37671 return ix86_cost->sse_move;
37672 if (MAYBE_MMX_CLASS_P (class1))
37673 return ix86_cost->mmx_move;
37674 return 2;
37675 }
37676
37677 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37678 MODE. */
37679
37680 bool
37681 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37682 {
37683 /* Flags and only flags can only hold CCmode values. */
37684 if (CC_REGNO_P (regno))
37685 return GET_MODE_CLASS (mode) == MODE_CC;
37686 if (GET_MODE_CLASS (mode) == MODE_CC
37687 || GET_MODE_CLASS (mode) == MODE_RANDOM
37688 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37689 return false;
37690 if (STACK_REGNO_P (regno))
37691 return VALID_FP_MODE_P (mode);
37692 if (MASK_REGNO_P (regno))
37693 return VALID_MASK_REG_MODE (mode);
37694 if (SSE_REGNO_P (regno))
37695 {
37696 /* We implement the move patterns for all vector modes into and
37697 out of SSE registers, even when no operation instructions
37698 are available. */
37699
37700 /* For AVX-512 we allow, regardless of regno:
37701 - XI mode
37702 - any of 512-bit wide vector mode
37703 - any scalar mode. */
37704 if (TARGET_AVX512F
37705 && (mode == XImode
37706 || VALID_AVX512F_REG_MODE (mode)
37707 || VALID_AVX512F_SCALAR_MODE (mode)))
37708 return true;
37709
37710 /* xmm16-xmm31 are only available for AVX-512. */
37711 if (EXT_REX_SSE_REGNO_P (regno))
37712 return false;
37713
37714 /* OImode and AVX modes are available only when AVX is enabled. */
37715 return ((TARGET_AVX
37716 && VALID_AVX256_REG_OR_OI_MODE (mode))
37717 || VALID_SSE_REG_MODE (mode)
37718 || VALID_SSE2_REG_MODE (mode)
37719 || VALID_MMX_REG_MODE (mode)
37720 || VALID_MMX_REG_MODE_3DNOW (mode));
37721 }
37722 if (MMX_REGNO_P (regno))
37723 {
37724 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37725 so if the register is available at all, then we can move data of
37726 the given mode into or out of it. */
37727 return (VALID_MMX_REG_MODE (mode)
37728 || VALID_MMX_REG_MODE_3DNOW (mode));
37729 }
37730
37731 if (mode == QImode)
37732 {
37733 /* Take care for QImode values - they can be in non-QI regs,
37734 but then they do cause partial register stalls. */
37735 if (ANY_QI_REGNO_P (regno))
37736 return true;
37737 if (!TARGET_PARTIAL_REG_STALL)
37738 return true;
37739 /* LRA checks if the hard register is OK for the given mode.
37740 QImode values can live in non-QI regs, so we allow all
37741 registers here. */
37742 if (lra_in_progress)
37743 return true;
37744 return !can_create_pseudo_p ();
37745 }
37746 /* We handle both integer and floats in the general purpose registers. */
37747 else if (VALID_INT_MODE_P (mode))
37748 return true;
37749 else if (VALID_FP_MODE_P (mode))
37750 return true;
37751 else if (VALID_DFP_MODE_P (mode))
37752 return true;
37753 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37754 on to use that value in smaller contexts, this can easily force a
37755 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37756 supporting DImode, allow it. */
37757 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37758 return true;
37759
37760 return false;
37761 }
37762
37763 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37764 tieable integer mode. */
37765
37766 static bool
37767 ix86_tieable_integer_mode_p (enum machine_mode mode)
37768 {
37769 switch (mode)
37770 {
37771 case HImode:
37772 case SImode:
37773 return true;
37774
37775 case QImode:
37776 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37777
37778 case DImode:
37779 return TARGET_64BIT;
37780
37781 default:
37782 return false;
37783 }
37784 }
37785
37786 /* Return true if MODE1 is accessible in a register that can hold MODE2
37787 without copying. That is, all register classes that can hold MODE2
37788 can also hold MODE1. */
37789
37790 bool
37791 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37792 {
37793 if (mode1 == mode2)
37794 return true;
37795
37796 if (ix86_tieable_integer_mode_p (mode1)
37797 && ix86_tieable_integer_mode_p (mode2))
37798 return true;
37799
37800 /* MODE2 being XFmode implies fp stack or general regs, which means we
37801 can tie any smaller floating point modes to it. Note that we do not
37802 tie this with TFmode. */
37803 if (mode2 == XFmode)
37804 return mode1 == SFmode || mode1 == DFmode;
37805
37806 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37807 that we can tie it with SFmode. */
37808 if (mode2 == DFmode)
37809 return mode1 == SFmode;
37810
37811 /* If MODE2 is only appropriate for an SSE register, then tie with
37812 any other mode acceptable to SSE registers. */
37813 if (GET_MODE_SIZE (mode2) == 32
37814 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37815 return (GET_MODE_SIZE (mode1) == 32
37816 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37817 if (GET_MODE_SIZE (mode2) == 16
37818 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37819 return (GET_MODE_SIZE (mode1) == 16
37820 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37821
37822 /* If MODE2 is appropriate for an MMX register, then tie
37823 with any other mode acceptable to MMX registers. */
37824 if (GET_MODE_SIZE (mode2) == 8
37825 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37826 return (GET_MODE_SIZE (mode1) == 8
37827 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37828
37829 return false;
37830 }
37831
37832 /* Return the cost of moving between two registers of mode MODE. */
37833
37834 static int
37835 ix86_set_reg_reg_cost (enum machine_mode mode)
37836 {
37837 unsigned int units = UNITS_PER_WORD;
37838
37839 switch (GET_MODE_CLASS (mode))
37840 {
37841 default:
37842 break;
37843
37844 case MODE_CC:
37845 units = GET_MODE_SIZE (CCmode);
37846 break;
37847
37848 case MODE_FLOAT:
37849 if ((TARGET_SSE && mode == TFmode)
37850 || (TARGET_80387 && mode == XFmode)
37851 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37852 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37853 units = GET_MODE_SIZE (mode);
37854 break;
37855
37856 case MODE_COMPLEX_FLOAT:
37857 if ((TARGET_SSE && mode == TCmode)
37858 || (TARGET_80387 && mode == XCmode)
37859 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37860 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37861 units = GET_MODE_SIZE (mode);
37862 break;
37863
37864 case MODE_VECTOR_INT:
37865 case MODE_VECTOR_FLOAT:
37866 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37867 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37868 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37869 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37870 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37871 units = GET_MODE_SIZE (mode);
37872 }
37873
37874 /* Return the cost of moving between two registers of mode MODE,
37875 assuming that the move will be in pieces of at most UNITS bytes. */
37876 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37877 }
37878
37879 /* Compute a (partial) cost for rtx X. Return true if the complete
37880 cost has been computed, and false if subexpressions should be
37881 scanned. In either case, *TOTAL contains the cost result. */
37882
37883 static bool
37884 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37885 bool speed)
37886 {
37887 rtx mask;
37888 enum rtx_code code = (enum rtx_code) code_i;
37889 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37890 enum machine_mode mode = GET_MODE (x);
37891 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37892
37893 switch (code)
37894 {
37895 case SET:
37896 if (register_operand (SET_DEST (x), VOIDmode)
37897 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37898 {
37899 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37900 return true;
37901 }
37902 return false;
37903
37904 case CONST_INT:
37905 case CONST:
37906 case LABEL_REF:
37907 case SYMBOL_REF:
37908 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37909 *total = 3;
37910 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37911 *total = 2;
37912 else if (flag_pic && SYMBOLIC_CONST (x)
37913 && !(TARGET_64BIT
37914 && (GET_CODE (x) == LABEL_REF
37915 || (GET_CODE (x) == SYMBOL_REF
37916 && SYMBOL_REF_LOCAL_P (x)))))
37917 *total = 1;
37918 else
37919 *total = 0;
37920 return true;
37921
37922 case CONST_DOUBLE:
37923 if (mode == VOIDmode)
37924 {
37925 *total = 0;
37926 return true;
37927 }
37928 switch (standard_80387_constant_p (x))
37929 {
37930 case 1: /* 0.0 */
37931 *total = 1;
37932 return true;
37933 default: /* Other constants */
37934 *total = 2;
37935 return true;
37936 case 0:
37937 case -1:
37938 break;
37939 }
37940 if (SSE_FLOAT_MODE_P (mode))
37941 {
37942 case CONST_VECTOR:
37943 switch (standard_sse_constant_p (x))
37944 {
37945 case 0:
37946 break;
37947 case 1: /* 0: xor eliminates false dependency */
37948 *total = 0;
37949 return true;
37950 default: /* -1: cmp contains false dependency */
37951 *total = 1;
37952 return true;
37953 }
37954 }
37955 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37956 it'll probably end up. Add a penalty for size. */
37957 *total = (COSTS_N_INSNS (1)
37958 + (flag_pic != 0 && !TARGET_64BIT)
37959 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37960 return true;
37961
37962 case ZERO_EXTEND:
37963 /* The zero extensions is often completely free on x86_64, so make
37964 it as cheap as possible. */
37965 if (TARGET_64BIT && mode == DImode
37966 && GET_MODE (XEXP (x, 0)) == SImode)
37967 *total = 1;
37968 else if (TARGET_ZERO_EXTEND_WITH_AND)
37969 *total = cost->add;
37970 else
37971 *total = cost->movzx;
37972 return false;
37973
37974 case SIGN_EXTEND:
37975 *total = cost->movsx;
37976 return false;
37977
37978 case ASHIFT:
37979 if (SCALAR_INT_MODE_P (mode)
37980 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37981 && CONST_INT_P (XEXP (x, 1)))
37982 {
37983 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37984 if (value == 1)
37985 {
37986 *total = cost->add;
37987 return false;
37988 }
37989 if ((value == 2 || value == 3)
37990 && cost->lea <= cost->shift_const)
37991 {
37992 *total = cost->lea;
37993 return false;
37994 }
37995 }
37996 /* FALLTHRU */
37997
37998 case ROTATE:
37999 case ASHIFTRT:
38000 case LSHIFTRT:
38001 case ROTATERT:
38002 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38003 {
38004 /* ??? Should be SSE vector operation cost. */
38005 /* At least for published AMD latencies, this really is the same
38006 as the latency for a simple fpu operation like fabs. */
38007 /* V*QImode is emulated with 1-11 insns. */
38008 if (mode == V16QImode || mode == V32QImode)
38009 {
38010 int count = 11;
38011 if (TARGET_XOP && mode == V16QImode)
38012 {
38013 /* For XOP we use vpshab, which requires a broadcast of the
38014 value to the variable shift insn. For constants this
38015 means a V16Q const in mem; even when we can perform the
38016 shift with one insn set the cost to prefer paddb. */
38017 if (CONSTANT_P (XEXP (x, 1)))
38018 {
38019 *total = (cost->fabs
38020 + rtx_cost (XEXP (x, 0), code, 0, speed)
38021 + (speed ? 2 : COSTS_N_BYTES (16)));
38022 return true;
38023 }
38024 count = 3;
38025 }
38026 else if (TARGET_SSSE3)
38027 count = 7;
38028 *total = cost->fabs * count;
38029 }
38030 else
38031 *total = cost->fabs;
38032 }
38033 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38034 {
38035 if (CONST_INT_P (XEXP (x, 1)))
38036 {
38037 if (INTVAL (XEXP (x, 1)) > 32)
38038 *total = cost->shift_const + COSTS_N_INSNS (2);
38039 else
38040 *total = cost->shift_const * 2;
38041 }
38042 else
38043 {
38044 if (GET_CODE (XEXP (x, 1)) == AND)
38045 *total = cost->shift_var * 2;
38046 else
38047 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38048 }
38049 }
38050 else
38051 {
38052 if (CONST_INT_P (XEXP (x, 1)))
38053 *total = cost->shift_const;
38054 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38055 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38056 {
38057 /* Return the cost after shift-and truncation. */
38058 *total = cost->shift_var;
38059 return true;
38060 }
38061 else
38062 *total = cost->shift_var;
38063 }
38064 return false;
38065
38066 case FMA:
38067 {
38068 rtx sub;
38069
38070 gcc_assert (FLOAT_MODE_P (mode));
38071 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38072
38073 /* ??? SSE scalar/vector cost should be used here. */
38074 /* ??? Bald assumption that fma has the same cost as fmul. */
38075 *total = cost->fmul;
38076 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38077
38078 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38079 sub = XEXP (x, 0);
38080 if (GET_CODE (sub) == NEG)
38081 sub = XEXP (sub, 0);
38082 *total += rtx_cost (sub, FMA, 0, speed);
38083
38084 sub = XEXP (x, 2);
38085 if (GET_CODE (sub) == NEG)
38086 sub = XEXP (sub, 0);
38087 *total += rtx_cost (sub, FMA, 2, speed);
38088 return true;
38089 }
38090
38091 case MULT:
38092 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38093 {
38094 /* ??? SSE scalar cost should be used here. */
38095 *total = cost->fmul;
38096 return false;
38097 }
38098 else if (X87_FLOAT_MODE_P (mode))
38099 {
38100 *total = cost->fmul;
38101 return false;
38102 }
38103 else if (FLOAT_MODE_P (mode))
38104 {
38105 /* ??? SSE vector cost should be used here. */
38106 *total = cost->fmul;
38107 return false;
38108 }
38109 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38110 {
38111 /* V*QImode is emulated with 7-13 insns. */
38112 if (mode == V16QImode || mode == V32QImode)
38113 {
38114 int extra = 11;
38115 if (TARGET_XOP && mode == V16QImode)
38116 extra = 5;
38117 else if (TARGET_SSSE3)
38118 extra = 6;
38119 *total = cost->fmul * 2 + cost->fabs * extra;
38120 }
38121 /* V*DImode is emulated with 5-8 insns. */
38122 else if (mode == V2DImode || mode == V4DImode)
38123 {
38124 if (TARGET_XOP && mode == V2DImode)
38125 *total = cost->fmul * 2 + cost->fabs * 3;
38126 else
38127 *total = cost->fmul * 3 + cost->fabs * 5;
38128 }
38129 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38130 insns, including two PMULUDQ. */
38131 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38132 *total = cost->fmul * 2 + cost->fabs * 5;
38133 else
38134 *total = cost->fmul;
38135 return false;
38136 }
38137 else
38138 {
38139 rtx op0 = XEXP (x, 0);
38140 rtx op1 = XEXP (x, 1);
38141 int nbits;
38142 if (CONST_INT_P (XEXP (x, 1)))
38143 {
38144 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38145 for (nbits = 0; value != 0; value &= value - 1)
38146 nbits++;
38147 }
38148 else
38149 /* This is arbitrary. */
38150 nbits = 7;
38151
38152 /* Compute costs correctly for widening multiplication. */
38153 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38154 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38155 == GET_MODE_SIZE (mode))
38156 {
38157 int is_mulwiden = 0;
38158 enum machine_mode inner_mode = GET_MODE (op0);
38159
38160 if (GET_CODE (op0) == GET_CODE (op1))
38161 is_mulwiden = 1, op1 = XEXP (op1, 0);
38162 else if (CONST_INT_P (op1))
38163 {
38164 if (GET_CODE (op0) == SIGN_EXTEND)
38165 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38166 == INTVAL (op1);
38167 else
38168 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38169 }
38170
38171 if (is_mulwiden)
38172 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38173 }
38174
38175 *total = (cost->mult_init[MODE_INDEX (mode)]
38176 + nbits * cost->mult_bit
38177 + rtx_cost (op0, outer_code, opno, speed)
38178 + rtx_cost (op1, outer_code, opno, speed));
38179
38180 return true;
38181 }
38182
38183 case DIV:
38184 case UDIV:
38185 case MOD:
38186 case UMOD:
38187 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38188 /* ??? SSE cost should be used here. */
38189 *total = cost->fdiv;
38190 else if (X87_FLOAT_MODE_P (mode))
38191 *total = cost->fdiv;
38192 else if (FLOAT_MODE_P (mode))
38193 /* ??? SSE vector cost should be used here. */
38194 *total = cost->fdiv;
38195 else
38196 *total = cost->divide[MODE_INDEX (mode)];
38197 return false;
38198
38199 case PLUS:
38200 if (GET_MODE_CLASS (mode) == MODE_INT
38201 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38202 {
38203 if (GET_CODE (XEXP (x, 0)) == PLUS
38204 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38205 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38206 && CONSTANT_P (XEXP (x, 1)))
38207 {
38208 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38209 if (val == 2 || val == 4 || val == 8)
38210 {
38211 *total = cost->lea;
38212 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38213 outer_code, opno, speed);
38214 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38215 outer_code, opno, speed);
38216 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38217 return true;
38218 }
38219 }
38220 else if (GET_CODE (XEXP (x, 0)) == MULT
38221 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38222 {
38223 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38224 if (val == 2 || val == 4 || val == 8)
38225 {
38226 *total = cost->lea;
38227 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38228 outer_code, opno, speed);
38229 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38230 return true;
38231 }
38232 }
38233 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38234 {
38235 *total = cost->lea;
38236 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38237 outer_code, opno, speed);
38238 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38239 outer_code, opno, speed);
38240 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38241 return true;
38242 }
38243 }
38244 /* FALLTHRU */
38245
38246 case MINUS:
38247 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38248 {
38249 /* ??? SSE cost should be used here. */
38250 *total = cost->fadd;
38251 return false;
38252 }
38253 else if (X87_FLOAT_MODE_P (mode))
38254 {
38255 *total = cost->fadd;
38256 return false;
38257 }
38258 else if (FLOAT_MODE_P (mode))
38259 {
38260 /* ??? SSE vector cost should be used here. */
38261 *total = cost->fadd;
38262 return false;
38263 }
38264 /* FALLTHRU */
38265
38266 case AND:
38267 case IOR:
38268 case XOR:
38269 if (GET_MODE_CLASS (mode) == MODE_INT
38270 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38271 {
38272 *total = (cost->add * 2
38273 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38274 << (GET_MODE (XEXP (x, 0)) != DImode))
38275 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38276 << (GET_MODE (XEXP (x, 1)) != DImode)));
38277 return true;
38278 }
38279 /* FALLTHRU */
38280
38281 case NEG:
38282 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38283 {
38284 /* ??? SSE cost should be used here. */
38285 *total = cost->fchs;
38286 return false;
38287 }
38288 else if (X87_FLOAT_MODE_P (mode))
38289 {
38290 *total = cost->fchs;
38291 return false;
38292 }
38293 else if (FLOAT_MODE_P (mode))
38294 {
38295 /* ??? SSE vector cost should be used here. */
38296 *total = cost->fchs;
38297 return false;
38298 }
38299 /* FALLTHRU */
38300
38301 case NOT:
38302 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38303 {
38304 /* ??? Should be SSE vector operation cost. */
38305 /* At least for published AMD latencies, this really is the same
38306 as the latency for a simple fpu operation like fabs. */
38307 *total = cost->fabs;
38308 }
38309 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38310 *total = cost->add * 2;
38311 else
38312 *total = cost->add;
38313 return false;
38314
38315 case COMPARE:
38316 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38317 && XEXP (XEXP (x, 0), 1) == const1_rtx
38318 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38319 && XEXP (x, 1) == const0_rtx)
38320 {
38321 /* This kind of construct is implemented using test[bwl].
38322 Treat it as if we had an AND. */
38323 *total = (cost->add
38324 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38325 + rtx_cost (const1_rtx, outer_code, opno, speed));
38326 return true;
38327 }
38328 return false;
38329
38330 case FLOAT_EXTEND:
38331 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38332 *total = 0;
38333 return false;
38334
38335 case ABS:
38336 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38337 /* ??? SSE cost should be used here. */
38338 *total = cost->fabs;
38339 else if (X87_FLOAT_MODE_P (mode))
38340 *total = cost->fabs;
38341 else if (FLOAT_MODE_P (mode))
38342 /* ??? SSE vector cost should be used here. */
38343 *total = cost->fabs;
38344 return false;
38345
38346 case SQRT:
38347 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38348 /* ??? SSE cost should be used here. */
38349 *total = cost->fsqrt;
38350 else if (X87_FLOAT_MODE_P (mode))
38351 *total = cost->fsqrt;
38352 else if (FLOAT_MODE_P (mode))
38353 /* ??? SSE vector cost should be used here. */
38354 *total = cost->fsqrt;
38355 return false;
38356
38357 case UNSPEC:
38358 if (XINT (x, 1) == UNSPEC_TP)
38359 *total = 0;
38360 return false;
38361
38362 case VEC_SELECT:
38363 case VEC_CONCAT:
38364 case VEC_DUPLICATE:
38365 /* ??? Assume all of these vector manipulation patterns are
38366 recognizable. In which case they all pretty much have the
38367 same cost. */
38368 *total = cost->fabs;
38369 return true;
38370 case VEC_MERGE:
38371 mask = XEXP (x, 2);
38372 /* This is masked instruction, assume the same cost,
38373 as nonmasked variant. */
38374 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38375 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38376 else
38377 *total = cost->fabs;
38378 return true;
38379
38380 default:
38381 return false;
38382 }
38383 }
38384
38385 #if TARGET_MACHO
38386
38387 static int current_machopic_label_num;
38388
38389 /* Given a symbol name and its associated stub, write out the
38390 definition of the stub. */
38391
38392 void
38393 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38394 {
38395 unsigned int length;
38396 char *binder_name, *symbol_name, lazy_ptr_name[32];
38397 int label = ++current_machopic_label_num;
38398
38399 /* For 64-bit we shouldn't get here. */
38400 gcc_assert (!TARGET_64BIT);
38401
38402 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38403 symb = targetm.strip_name_encoding (symb);
38404
38405 length = strlen (stub);
38406 binder_name = XALLOCAVEC (char, length + 32);
38407 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38408
38409 length = strlen (symb);
38410 symbol_name = XALLOCAVEC (char, length + 32);
38411 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38412
38413 sprintf (lazy_ptr_name, "L%d$lz", label);
38414
38415 if (MACHOPIC_ATT_STUB)
38416 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38417 else if (MACHOPIC_PURE)
38418 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38419 else
38420 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38421
38422 fprintf (file, "%s:\n", stub);
38423 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38424
38425 if (MACHOPIC_ATT_STUB)
38426 {
38427 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38428 }
38429 else if (MACHOPIC_PURE)
38430 {
38431 /* PIC stub. */
38432 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38433 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38434 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38435 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38436 label, lazy_ptr_name, label);
38437 fprintf (file, "\tjmp\t*%%ecx\n");
38438 }
38439 else
38440 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38441
38442 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38443 it needs no stub-binding-helper. */
38444 if (MACHOPIC_ATT_STUB)
38445 return;
38446
38447 fprintf (file, "%s:\n", binder_name);
38448
38449 if (MACHOPIC_PURE)
38450 {
38451 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38452 fprintf (file, "\tpushl\t%%ecx\n");
38453 }
38454 else
38455 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38456
38457 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38458
38459 /* N.B. Keep the correspondence of these
38460 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38461 old-pic/new-pic/non-pic stubs; altering this will break
38462 compatibility with existing dylibs. */
38463 if (MACHOPIC_PURE)
38464 {
38465 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38466 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38467 }
38468 else
38469 /* 16-byte -mdynamic-no-pic stub. */
38470 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38471
38472 fprintf (file, "%s:\n", lazy_ptr_name);
38473 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38474 fprintf (file, ASM_LONG "%s\n", binder_name);
38475 }
38476 #endif /* TARGET_MACHO */
38477
38478 /* Order the registers for register allocator. */
38479
38480 void
38481 x86_order_regs_for_local_alloc (void)
38482 {
38483 int pos = 0;
38484 int i;
38485
38486 /* First allocate the local general purpose registers. */
38487 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38488 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38489 reg_alloc_order [pos++] = i;
38490
38491 /* Global general purpose registers. */
38492 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38493 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38494 reg_alloc_order [pos++] = i;
38495
38496 /* x87 registers come first in case we are doing FP math
38497 using them. */
38498 if (!TARGET_SSE_MATH)
38499 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38500 reg_alloc_order [pos++] = i;
38501
38502 /* SSE registers. */
38503 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38504 reg_alloc_order [pos++] = i;
38505 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38506 reg_alloc_order [pos++] = i;
38507
38508 /* Extended REX SSE registers. */
38509 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38510 reg_alloc_order [pos++] = i;
38511
38512 /* Mask register. */
38513 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38514 reg_alloc_order [pos++] = i;
38515
38516 /* x87 registers. */
38517 if (TARGET_SSE_MATH)
38518 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38519 reg_alloc_order [pos++] = i;
38520
38521 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38522 reg_alloc_order [pos++] = i;
38523
38524 /* Initialize the rest of array as we do not allocate some registers
38525 at all. */
38526 while (pos < FIRST_PSEUDO_REGISTER)
38527 reg_alloc_order [pos++] = 0;
38528 }
38529
38530 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38531 in struct attribute_spec handler. */
38532 static tree
38533 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38534 tree args,
38535 int flags ATTRIBUTE_UNUSED,
38536 bool *no_add_attrs)
38537 {
38538 if (TREE_CODE (*node) != FUNCTION_TYPE
38539 && TREE_CODE (*node) != METHOD_TYPE
38540 && TREE_CODE (*node) != FIELD_DECL
38541 && TREE_CODE (*node) != TYPE_DECL)
38542 {
38543 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38544 name);
38545 *no_add_attrs = true;
38546 return NULL_TREE;
38547 }
38548 if (TARGET_64BIT)
38549 {
38550 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38551 name);
38552 *no_add_attrs = true;
38553 return NULL_TREE;
38554 }
38555 if (is_attribute_p ("callee_pop_aggregate_return", name))
38556 {
38557 tree cst;
38558
38559 cst = TREE_VALUE (args);
38560 if (TREE_CODE (cst) != INTEGER_CST)
38561 {
38562 warning (OPT_Wattributes,
38563 "%qE attribute requires an integer constant argument",
38564 name);
38565 *no_add_attrs = true;
38566 }
38567 else if (compare_tree_int (cst, 0) != 0
38568 && compare_tree_int (cst, 1) != 0)
38569 {
38570 warning (OPT_Wattributes,
38571 "argument to %qE attribute is neither zero, nor one",
38572 name);
38573 *no_add_attrs = true;
38574 }
38575
38576 return NULL_TREE;
38577 }
38578
38579 return NULL_TREE;
38580 }
38581
38582 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38583 struct attribute_spec.handler. */
38584 static tree
38585 ix86_handle_abi_attribute (tree *node, tree name,
38586 tree args ATTRIBUTE_UNUSED,
38587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38588 {
38589 if (TREE_CODE (*node) != FUNCTION_TYPE
38590 && TREE_CODE (*node) != METHOD_TYPE
38591 && TREE_CODE (*node) != FIELD_DECL
38592 && TREE_CODE (*node) != TYPE_DECL)
38593 {
38594 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38595 name);
38596 *no_add_attrs = true;
38597 return NULL_TREE;
38598 }
38599
38600 /* Can combine regparm with all attributes but fastcall. */
38601 if (is_attribute_p ("ms_abi", name))
38602 {
38603 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38604 {
38605 error ("ms_abi and sysv_abi attributes are not compatible");
38606 }
38607
38608 return NULL_TREE;
38609 }
38610 else if (is_attribute_p ("sysv_abi", name))
38611 {
38612 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38613 {
38614 error ("ms_abi and sysv_abi attributes are not compatible");
38615 }
38616
38617 return NULL_TREE;
38618 }
38619
38620 return NULL_TREE;
38621 }
38622
38623 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38624 struct attribute_spec.handler. */
38625 static tree
38626 ix86_handle_struct_attribute (tree *node, tree name,
38627 tree args ATTRIBUTE_UNUSED,
38628 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38629 {
38630 tree *type = NULL;
38631 if (DECL_P (*node))
38632 {
38633 if (TREE_CODE (*node) == TYPE_DECL)
38634 type = &TREE_TYPE (*node);
38635 }
38636 else
38637 type = node;
38638
38639 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38640 {
38641 warning (OPT_Wattributes, "%qE attribute ignored",
38642 name);
38643 *no_add_attrs = true;
38644 }
38645
38646 else if ((is_attribute_p ("ms_struct", name)
38647 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38648 || ((is_attribute_p ("gcc_struct", name)
38649 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38650 {
38651 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38652 name);
38653 *no_add_attrs = true;
38654 }
38655
38656 return NULL_TREE;
38657 }
38658
38659 static tree
38660 ix86_handle_fndecl_attribute (tree *node, tree name,
38661 tree args ATTRIBUTE_UNUSED,
38662 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38663 {
38664 if (TREE_CODE (*node) != FUNCTION_DECL)
38665 {
38666 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38667 name);
38668 *no_add_attrs = true;
38669 }
38670 return NULL_TREE;
38671 }
38672
38673 static bool
38674 ix86_ms_bitfield_layout_p (const_tree record_type)
38675 {
38676 return ((TARGET_MS_BITFIELD_LAYOUT
38677 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38678 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38679 }
38680
38681 /* Returns an expression indicating where the this parameter is
38682 located on entry to the FUNCTION. */
38683
38684 static rtx
38685 x86_this_parameter (tree function)
38686 {
38687 tree type = TREE_TYPE (function);
38688 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38689 int nregs;
38690
38691 if (TARGET_64BIT)
38692 {
38693 const int *parm_regs;
38694
38695 if (ix86_function_type_abi (type) == MS_ABI)
38696 parm_regs = x86_64_ms_abi_int_parameter_registers;
38697 else
38698 parm_regs = x86_64_int_parameter_registers;
38699 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38700 }
38701
38702 nregs = ix86_function_regparm (type, function);
38703
38704 if (nregs > 0 && !stdarg_p (type))
38705 {
38706 int regno;
38707 unsigned int ccvt = ix86_get_callcvt (type);
38708
38709 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38710 regno = aggr ? DX_REG : CX_REG;
38711 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38712 {
38713 regno = CX_REG;
38714 if (aggr)
38715 return gen_rtx_MEM (SImode,
38716 plus_constant (Pmode, stack_pointer_rtx, 4));
38717 }
38718 else
38719 {
38720 regno = AX_REG;
38721 if (aggr)
38722 {
38723 regno = DX_REG;
38724 if (nregs == 1)
38725 return gen_rtx_MEM (SImode,
38726 plus_constant (Pmode,
38727 stack_pointer_rtx, 4));
38728 }
38729 }
38730 return gen_rtx_REG (SImode, regno);
38731 }
38732
38733 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38734 aggr ? 8 : 4));
38735 }
38736
38737 /* Determine whether x86_output_mi_thunk can succeed. */
38738
38739 static bool
38740 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38741 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38742 HOST_WIDE_INT vcall_offset, const_tree function)
38743 {
38744 /* 64-bit can handle anything. */
38745 if (TARGET_64BIT)
38746 return true;
38747
38748 /* For 32-bit, everything's fine if we have one free register. */
38749 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38750 return true;
38751
38752 /* Need a free register for vcall_offset. */
38753 if (vcall_offset)
38754 return false;
38755
38756 /* Need a free register for GOT references. */
38757 if (flag_pic && !targetm.binds_local_p (function))
38758 return false;
38759
38760 /* Otherwise ok. */
38761 return true;
38762 }
38763
38764 /* Output the assembler code for a thunk function. THUNK_DECL is the
38765 declaration for the thunk function itself, FUNCTION is the decl for
38766 the target function. DELTA is an immediate constant offset to be
38767 added to THIS. If VCALL_OFFSET is nonzero, the word at
38768 *(*this + vcall_offset) should be added to THIS. */
38769
38770 static void
38771 x86_output_mi_thunk (FILE *file,
38772 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38773 HOST_WIDE_INT vcall_offset, tree function)
38774 {
38775 rtx this_param = x86_this_parameter (function);
38776 rtx this_reg, tmp, fnaddr;
38777 unsigned int tmp_regno;
38778
38779 if (TARGET_64BIT)
38780 tmp_regno = R10_REG;
38781 else
38782 {
38783 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38784 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38785 tmp_regno = AX_REG;
38786 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38787 tmp_regno = DX_REG;
38788 else
38789 tmp_regno = CX_REG;
38790 }
38791
38792 emit_note (NOTE_INSN_PROLOGUE_END);
38793
38794 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38795 pull it in now and let DELTA benefit. */
38796 if (REG_P (this_param))
38797 this_reg = this_param;
38798 else if (vcall_offset)
38799 {
38800 /* Put the this parameter into %eax. */
38801 this_reg = gen_rtx_REG (Pmode, AX_REG);
38802 emit_move_insn (this_reg, this_param);
38803 }
38804 else
38805 this_reg = NULL_RTX;
38806
38807 /* Adjust the this parameter by a fixed constant. */
38808 if (delta)
38809 {
38810 rtx delta_rtx = GEN_INT (delta);
38811 rtx delta_dst = this_reg ? this_reg : this_param;
38812
38813 if (TARGET_64BIT)
38814 {
38815 if (!x86_64_general_operand (delta_rtx, Pmode))
38816 {
38817 tmp = gen_rtx_REG (Pmode, tmp_regno);
38818 emit_move_insn (tmp, delta_rtx);
38819 delta_rtx = tmp;
38820 }
38821 }
38822
38823 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38824 }
38825
38826 /* Adjust the this parameter by a value stored in the vtable. */
38827 if (vcall_offset)
38828 {
38829 rtx vcall_addr, vcall_mem, this_mem;
38830
38831 tmp = gen_rtx_REG (Pmode, tmp_regno);
38832
38833 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38834 if (Pmode != ptr_mode)
38835 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38836 emit_move_insn (tmp, this_mem);
38837
38838 /* Adjust the this parameter. */
38839 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38840 if (TARGET_64BIT
38841 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38842 {
38843 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38844 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38845 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38846 }
38847
38848 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38849 if (Pmode != ptr_mode)
38850 emit_insn (gen_addsi_1_zext (this_reg,
38851 gen_rtx_REG (ptr_mode,
38852 REGNO (this_reg)),
38853 vcall_mem));
38854 else
38855 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38856 }
38857
38858 /* If necessary, drop THIS back to its stack slot. */
38859 if (this_reg && this_reg != this_param)
38860 emit_move_insn (this_param, this_reg);
38861
38862 fnaddr = XEXP (DECL_RTL (function), 0);
38863 if (TARGET_64BIT)
38864 {
38865 if (!flag_pic || targetm.binds_local_p (function)
38866 || TARGET_PECOFF)
38867 ;
38868 else
38869 {
38870 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38871 tmp = gen_rtx_CONST (Pmode, tmp);
38872 fnaddr = gen_const_mem (Pmode, tmp);
38873 }
38874 }
38875 else
38876 {
38877 if (!flag_pic || targetm.binds_local_p (function))
38878 ;
38879 #if TARGET_MACHO
38880 else if (TARGET_MACHO)
38881 {
38882 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38883 fnaddr = XEXP (fnaddr, 0);
38884 }
38885 #endif /* TARGET_MACHO */
38886 else
38887 {
38888 tmp = gen_rtx_REG (Pmode, CX_REG);
38889 output_set_got (tmp, NULL_RTX);
38890
38891 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38892 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38893 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38894 fnaddr = gen_const_mem (Pmode, fnaddr);
38895 }
38896 }
38897
38898 /* Our sibling call patterns do not allow memories, because we have no
38899 predicate that can distinguish between frame and non-frame memory.
38900 For our purposes here, we can get away with (ab)using a jump pattern,
38901 because we're going to do no optimization. */
38902 if (MEM_P (fnaddr))
38903 {
38904 if (sibcall_insn_operand (fnaddr, word_mode))
38905 {
38906 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38907 tmp = emit_call_insn (tmp);
38908 SIBLING_CALL_P (tmp) = 1;
38909 }
38910 else
38911 emit_jump_insn (gen_indirect_jump (fnaddr));
38912 }
38913 else
38914 {
38915 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38916 fnaddr = legitimize_pic_address (fnaddr,
38917 gen_rtx_REG (Pmode, tmp_regno));
38918
38919 if (!sibcall_insn_operand (fnaddr, word_mode))
38920 {
38921 tmp = gen_rtx_REG (word_mode, tmp_regno);
38922 if (GET_MODE (fnaddr) != word_mode)
38923 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38924 emit_move_insn (tmp, fnaddr);
38925 fnaddr = tmp;
38926 }
38927
38928 tmp = gen_rtx_MEM (QImode, fnaddr);
38929 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38930 tmp = emit_call_insn (tmp);
38931 SIBLING_CALL_P (tmp) = 1;
38932 }
38933 emit_barrier ();
38934
38935 /* Emit just enough of rest_of_compilation to get the insns emitted.
38936 Note that use_thunk calls assemble_start_function et al. */
38937 tmp = get_insns ();
38938 shorten_branches (tmp);
38939 final_start_function (tmp, file, 1);
38940 final (tmp, file, 1);
38941 final_end_function ();
38942 }
38943
38944 static void
38945 x86_file_start (void)
38946 {
38947 default_file_start ();
38948 if (TARGET_16BIT)
38949 fputs ("\t.code16gcc\n", asm_out_file);
38950 #if TARGET_MACHO
38951 darwin_file_start ();
38952 #endif
38953 if (X86_FILE_START_VERSION_DIRECTIVE)
38954 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38955 if (X86_FILE_START_FLTUSED)
38956 fputs ("\t.global\t__fltused\n", asm_out_file);
38957 if (ix86_asm_dialect == ASM_INTEL)
38958 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38959 }
38960
38961 int
38962 x86_field_alignment (tree field, int computed)
38963 {
38964 enum machine_mode mode;
38965 tree type = TREE_TYPE (field);
38966
38967 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38968 return computed;
38969 mode = TYPE_MODE (strip_array_types (type));
38970 if (mode == DFmode || mode == DCmode
38971 || GET_MODE_CLASS (mode) == MODE_INT
38972 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38973 return MIN (32, computed);
38974 return computed;
38975 }
38976
38977 /* Output assembler code to FILE to increment profiler label # LABELNO
38978 for profiling a function entry. */
38979 void
38980 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38981 {
38982 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38983 : MCOUNT_NAME);
38984
38985 if (TARGET_64BIT)
38986 {
38987 #ifndef NO_PROFILE_COUNTERS
38988 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38989 #endif
38990
38991 if (!TARGET_PECOFF && flag_pic)
38992 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38993 else
38994 fprintf (file, "\tcall\t%s\n", mcount_name);
38995 }
38996 else if (flag_pic)
38997 {
38998 #ifndef NO_PROFILE_COUNTERS
38999 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39000 LPREFIX, labelno);
39001 #endif
39002 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39003 }
39004 else
39005 {
39006 #ifndef NO_PROFILE_COUNTERS
39007 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39008 LPREFIX, labelno);
39009 #endif
39010 fprintf (file, "\tcall\t%s\n", mcount_name);
39011 }
39012 }
39013
39014 /* We don't have exact information about the insn sizes, but we may assume
39015 quite safely that we are informed about all 1 byte insns and memory
39016 address sizes. This is enough to eliminate unnecessary padding in
39017 99% of cases. */
39018
39019 static int
39020 min_insn_size (rtx insn)
39021 {
39022 int l = 0, len;
39023
39024 if (!INSN_P (insn) || !active_insn_p (insn))
39025 return 0;
39026
39027 /* Discard alignments we've emit and jump instructions. */
39028 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39029 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39030 return 0;
39031
39032 /* Important case - calls are always 5 bytes.
39033 It is common to have many calls in the row. */
39034 if (CALL_P (insn)
39035 && symbolic_reference_mentioned_p (PATTERN (insn))
39036 && !SIBLING_CALL_P (insn))
39037 return 5;
39038 len = get_attr_length (insn);
39039 if (len <= 1)
39040 return 1;
39041
39042 /* For normal instructions we rely on get_attr_length being exact,
39043 with a few exceptions. */
39044 if (!JUMP_P (insn))
39045 {
39046 enum attr_type type = get_attr_type (insn);
39047
39048 switch (type)
39049 {
39050 case TYPE_MULTI:
39051 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39052 || asm_noperands (PATTERN (insn)) >= 0)
39053 return 0;
39054 break;
39055 case TYPE_OTHER:
39056 case TYPE_FCMP:
39057 break;
39058 default:
39059 /* Otherwise trust get_attr_length. */
39060 return len;
39061 }
39062
39063 l = get_attr_length_address (insn);
39064 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39065 l = 4;
39066 }
39067 if (l)
39068 return 1+l;
39069 else
39070 return 2;
39071 }
39072
39073 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39074
39075 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39076 window. */
39077
39078 static void
39079 ix86_avoid_jump_mispredicts (void)
39080 {
39081 rtx insn, start = get_insns ();
39082 int nbytes = 0, njumps = 0;
39083 int isjump = 0;
39084
39085 /* Look for all minimal intervals of instructions containing 4 jumps.
39086 The intervals are bounded by START and INSN. NBYTES is the total
39087 size of instructions in the interval including INSN and not including
39088 START. When the NBYTES is smaller than 16 bytes, it is possible
39089 that the end of START and INSN ends up in the same 16byte page.
39090
39091 The smallest offset in the page INSN can start is the case where START
39092 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39093 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39094
39095 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39096 have to, control transfer to label(s) can be performed through other
39097 means, and also we estimate minimum length of all asm stmts as 0. */
39098 for (insn = start; insn; insn = NEXT_INSN (insn))
39099 {
39100 int min_size;
39101
39102 if (LABEL_P (insn))
39103 {
39104 int align = label_to_alignment (insn);
39105 int max_skip = label_to_max_skip (insn);
39106
39107 if (max_skip > 15)
39108 max_skip = 15;
39109 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39110 already in the current 16 byte page, because otherwise
39111 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39112 bytes to reach 16 byte boundary. */
39113 if (align <= 0
39114 || (align <= 3 && max_skip != (1 << align) - 1))
39115 max_skip = 0;
39116 if (dump_file)
39117 fprintf (dump_file, "Label %i with max_skip %i\n",
39118 INSN_UID (insn), max_skip);
39119 if (max_skip)
39120 {
39121 while (nbytes + max_skip >= 16)
39122 {
39123 start = NEXT_INSN (start);
39124 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39125 || CALL_P (start))
39126 njumps--, isjump = 1;
39127 else
39128 isjump = 0;
39129 nbytes -= min_insn_size (start);
39130 }
39131 }
39132 continue;
39133 }
39134
39135 min_size = min_insn_size (insn);
39136 nbytes += min_size;
39137 if (dump_file)
39138 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39139 INSN_UID (insn), min_size);
39140 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39141 || CALL_P (insn))
39142 njumps++;
39143 else
39144 continue;
39145
39146 while (njumps > 3)
39147 {
39148 start = NEXT_INSN (start);
39149 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39150 || CALL_P (start))
39151 njumps--, isjump = 1;
39152 else
39153 isjump = 0;
39154 nbytes -= min_insn_size (start);
39155 }
39156 gcc_assert (njumps >= 0);
39157 if (dump_file)
39158 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39159 INSN_UID (start), INSN_UID (insn), nbytes);
39160
39161 if (njumps == 3 && isjump && nbytes < 16)
39162 {
39163 int padsize = 15 - nbytes + min_insn_size (insn);
39164
39165 if (dump_file)
39166 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39167 INSN_UID (insn), padsize);
39168 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39169 }
39170 }
39171 }
39172 #endif
39173
39174 /* AMD Athlon works faster
39175 when RET is not destination of conditional jump or directly preceded
39176 by other jump instruction. We avoid the penalty by inserting NOP just
39177 before the RET instructions in such cases. */
39178 static void
39179 ix86_pad_returns (void)
39180 {
39181 edge e;
39182 edge_iterator ei;
39183
39184 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39185 {
39186 basic_block bb = e->src;
39187 rtx ret = BB_END (bb);
39188 rtx prev;
39189 bool replace = false;
39190
39191 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39192 || optimize_bb_for_size_p (bb))
39193 continue;
39194 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39195 if (active_insn_p (prev) || LABEL_P (prev))
39196 break;
39197 if (prev && LABEL_P (prev))
39198 {
39199 edge e;
39200 edge_iterator ei;
39201
39202 FOR_EACH_EDGE (e, ei, bb->preds)
39203 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39204 && !(e->flags & EDGE_FALLTHRU))
39205 {
39206 replace = true;
39207 break;
39208 }
39209 }
39210 if (!replace)
39211 {
39212 prev = prev_active_insn (ret);
39213 if (prev
39214 && ((JUMP_P (prev) && any_condjump_p (prev))
39215 || CALL_P (prev)))
39216 replace = true;
39217 /* Empty functions get branch mispredict even when
39218 the jump destination is not visible to us. */
39219 if (!prev && !optimize_function_for_size_p (cfun))
39220 replace = true;
39221 }
39222 if (replace)
39223 {
39224 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39225 delete_insn (ret);
39226 }
39227 }
39228 }
39229
39230 /* Count the minimum number of instructions in BB. Return 4 if the
39231 number of instructions >= 4. */
39232
39233 static int
39234 ix86_count_insn_bb (basic_block bb)
39235 {
39236 rtx insn;
39237 int insn_count = 0;
39238
39239 /* Count number of instructions in this block. Return 4 if the number
39240 of instructions >= 4. */
39241 FOR_BB_INSNS (bb, insn)
39242 {
39243 /* Only happen in exit blocks. */
39244 if (JUMP_P (insn)
39245 && ANY_RETURN_P (PATTERN (insn)))
39246 break;
39247
39248 if (NONDEBUG_INSN_P (insn)
39249 && GET_CODE (PATTERN (insn)) != USE
39250 && GET_CODE (PATTERN (insn)) != CLOBBER)
39251 {
39252 insn_count++;
39253 if (insn_count >= 4)
39254 return insn_count;
39255 }
39256 }
39257
39258 return insn_count;
39259 }
39260
39261
39262 /* Count the minimum number of instructions in code path in BB.
39263 Return 4 if the number of instructions >= 4. */
39264
39265 static int
39266 ix86_count_insn (basic_block bb)
39267 {
39268 edge e;
39269 edge_iterator ei;
39270 int min_prev_count;
39271
39272 /* Only bother counting instructions along paths with no
39273 more than 2 basic blocks between entry and exit. Given
39274 that BB has an edge to exit, determine if a predecessor
39275 of BB has an edge from entry. If so, compute the number
39276 of instructions in the predecessor block. If there
39277 happen to be multiple such blocks, compute the minimum. */
39278 min_prev_count = 4;
39279 FOR_EACH_EDGE (e, ei, bb->preds)
39280 {
39281 edge prev_e;
39282 edge_iterator prev_ei;
39283
39284 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39285 {
39286 min_prev_count = 0;
39287 break;
39288 }
39289 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39290 {
39291 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39292 {
39293 int count = ix86_count_insn_bb (e->src);
39294 if (count < min_prev_count)
39295 min_prev_count = count;
39296 break;
39297 }
39298 }
39299 }
39300
39301 if (min_prev_count < 4)
39302 min_prev_count += ix86_count_insn_bb (bb);
39303
39304 return min_prev_count;
39305 }
39306
39307 /* Pad short function to 4 instructions. */
39308
39309 static void
39310 ix86_pad_short_function (void)
39311 {
39312 edge e;
39313 edge_iterator ei;
39314
39315 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39316 {
39317 rtx ret = BB_END (e->src);
39318 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39319 {
39320 int insn_count = ix86_count_insn (e->src);
39321
39322 /* Pad short function. */
39323 if (insn_count < 4)
39324 {
39325 rtx insn = ret;
39326
39327 /* Find epilogue. */
39328 while (insn
39329 && (!NOTE_P (insn)
39330 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39331 insn = PREV_INSN (insn);
39332
39333 if (!insn)
39334 insn = ret;
39335
39336 /* Two NOPs count as one instruction. */
39337 insn_count = 2 * (4 - insn_count);
39338 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39339 }
39340 }
39341 }
39342 }
39343
39344 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39345 the epilogue, the Windows system unwinder will apply epilogue logic and
39346 produce incorrect offsets. This can be avoided by adding a nop between
39347 the last insn that can throw and the first insn of the epilogue. */
39348
39349 static void
39350 ix86_seh_fixup_eh_fallthru (void)
39351 {
39352 edge e;
39353 edge_iterator ei;
39354
39355 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39356 {
39357 rtx insn, next;
39358
39359 /* Find the beginning of the epilogue. */
39360 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39361 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39362 break;
39363 if (insn == NULL)
39364 continue;
39365
39366 /* We only care about preceding insns that can throw. */
39367 insn = prev_active_insn (insn);
39368 if (insn == NULL || !can_throw_internal (insn))
39369 continue;
39370
39371 /* Do not separate calls from their debug information. */
39372 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39373 if (NOTE_P (next)
39374 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39375 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39376 insn = next;
39377 else
39378 break;
39379
39380 emit_insn_after (gen_nops (const1_rtx), insn);
39381 }
39382 }
39383
39384 /* Implement machine specific optimizations. We implement padding of returns
39385 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39386 static void
39387 ix86_reorg (void)
39388 {
39389 /* We are freeing block_for_insn in the toplev to keep compatibility
39390 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39391 compute_bb_for_insn ();
39392
39393 if (TARGET_SEH && current_function_has_exception_handlers ())
39394 ix86_seh_fixup_eh_fallthru ();
39395
39396 if (optimize && optimize_function_for_speed_p (cfun))
39397 {
39398 if (TARGET_PAD_SHORT_FUNCTION)
39399 ix86_pad_short_function ();
39400 else if (TARGET_PAD_RETURNS)
39401 ix86_pad_returns ();
39402 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39403 if (TARGET_FOUR_JUMP_LIMIT)
39404 ix86_avoid_jump_mispredicts ();
39405 #endif
39406 }
39407 }
39408
39409 /* Return nonzero when QImode register that must be represented via REX prefix
39410 is used. */
39411 bool
39412 x86_extended_QIreg_mentioned_p (rtx insn)
39413 {
39414 int i;
39415 extract_insn_cached (insn);
39416 for (i = 0; i < recog_data.n_operands; i++)
39417 if (GENERAL_REG_P (recog_data.operand[i])
39418 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39419 return true;
39420 return false;
39421 }
39422
39423 /* Return nonzero when P points to register encoded via REX prefix.
39424 Called via for_each_rtx. */
39425 static int
39426 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39427 {
39428 unsigned int regno;
39429 if (!REG_P (*p))
39430 return 0;
39431 regno = REGNO (*p);
39432 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39433 }
39434
39435 /* Return true when INSN mentions register that must be encoded using REX
39436 prefix. */
39437 bool
39438 x86_extended_reg_mentioned_p (rtx insn)
39439 {
39440 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39441 extended_reg_mentioned_1, NULL);
39442 }
39443
39444 /* If profitable, negate (without causing overflow) integer constant
39445 of mode MODE at location LOC. Return true in this case. */
39446 bool
39447 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39448 {
39449 HOST_WIDE_INT val;
39450
39451 if (!CONST_INT_P (*loc))
39452 return false;
39453
39454 switch (mode)
39455 {
39456 case DImode:
39457 /* DImode x86_64 constants must fit in 32 bits. */
39458 gcc_assert (x86_64_immediate_operand (*loc, mode));
39459
39460 mode = SImode;
39461 break;
39462
39463 case SImode:
39464 case HImode:
39465 case QImode:
39466 break;
39467
39468 default:
39469 gcc_unreachable ();
39470 }
39471
39472 /* Avoid overflows. */
39473 if (mode_signbit_p (mode, *loc))
39474 return false;
39475
39476 val = INTVAL (*loc);
39477
39478 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39479 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39480 if ((val < 0 && val != -128)
39481 || val == 128)
39482 {
39483 *loc = GEN_INT (-val);
39484 return true;
39485 }
39486
39487 return false;
39488 }
39489
39490 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39491 optabs would emit if we didn't have TFmode patterns. */
39492
39493 void
39494 x86_emit_floatuns (rtx operands[2])
39495 {
39496 rtx neglab, donelab, i0, i1, f0, in, out;
39497 enum machine_mode mode, inmode;
39498
39499 inmode = GET_MODE (operands[1]);
39500 gcc_assert (inmode == SImode || inmode == DImode);
39501
39502 out = operands[0];
39503 in = force_reg (inmode, operands[1]);
39504 mode = GET_MODE (out);
39505 neglab = gen_label_rtx ();
39506 donelab = gen_label_rtx ();
39507 f0 = gen_reg_rtx (mode);
39508
39509 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39510
39511 expand_float (out, in, 0);
39512
39513 emit_jump_insn (gen_jump (donelab));
39514 emit_barrier ();
39515
39516 emit_label (neglab);
39517
39518 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39519 1, OPTAB_DIRECT);
39520 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39521 1, OPTAB_DIRECT);
39522 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39523
39524 expand_float (f0, i0, 0);
39525
39526 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39527
39528 emit_label (donelab);
39529 }
39530 \f
39531 /* AVX512F does support 64-byte integer vector operations,
39532 thus the longest vector we are faced with is V64QImode. */
39533 #define MAX_VECT_LEN 64
39534
39535 struct expand_vec_perm_d
39536 {
39537 rtx target, op0, op1;
39538 unsigned char perm[MAX_VECT_LEN];
39539 enum machine_mode vmode;
39540 unsigned char nelt;
39541 bool one_operand_p;
39542 bool testing_p;
39543 };
39544
39545 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39546 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39547 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39548
39549 /* Get a vector mode of the same size as the original but with elements
39550 twice as wide. This is only guaranteed to apply to integral vectors. */
39551
39552 static inline enum machine_mode
39553 get_mode_wider_vector (enum machine_mode o)
39554 {
39555 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39556 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39557 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39558 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39559 return n;
39560 }
39561
39562 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39563 fill target with val via vec_duplicate. */
39564
39565 static bool
39566 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39567 {
39568 bool ok;
39569 rtx insn, dup;
39570
39571 /* First attempt to recognize VAL as-is. */
39572 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39573 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39574 if (recog_memoized (insn) < 0)
39575 {
39576 rtx seq;
39577 /* If that fails, force VAL into a register. */
39578
39579 start_sequence ();
39580 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39581 seq = get_insns ();
39582 end_sequence ();
39583 if (seq)
39584 emit_insn_before (seq, insn);
39585
39586 ok = recog_memoized (insn) >= 0;
39587 gcc_assert (ok);
39588 }
39589 return true;
39590 }
39591
39592 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39593 with all elements equal to VAR. Return true if successful. */
39594
39595 static bool
39596 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39597 rtx target, rtx val)
39598 {
39599 bool ok;
39600
39601 switch (mode)
39602 {
39603 case V2SImode:
39604 case V2SFmode:
39605 if (!mmx_ok)
39606 return false;
39607 /* FALLTHRU */
39608
39609 case V4DFmode:
39610 case V4DImode:
39611 case V8SFmode:
39612 case V8SImode:
39613 case V2DFmode:
39614 case V2DImode:
39615 case V4SFmode:
39616 case V4SImode:
39617 case V16SImode:
39618 case V8DImode:
39619 case V16SFmode:
39620 case V8DFmode:
39621 return ix86_vector_duplicate_value (mode, target, val);
39622
39623 case V4HImode:
39624 if (!mmx_ok)
39625 return false;
39626 if (TARGET_SSE || TARGET_3DNOW_A)
39627 {
39628 rtx x;
39629
39630 val = gen_lowpart (SImode, val);
39631 x = gen_rtx_TRUNCATE (HImode, val);
39632 x = gen_rtx_VEC_DUPLICATE (mode, x);
39633 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39634 return true;
39635 }
39636 goto widen;
39637
39638 case V8QImode:
39639 if (!mmx_ok)
39640 return false;
39641 goto widen;
39642
39643 case V8HImode:
39644 if (TARGET_SSE2)
39645 {
39646 struct expand_vec_perm_d dperm;
39647 rtx tmp1, tmp2;
39648
39649 permute:
39650 memset (&dperm, 0, sizeof (dperm));
39651 dperm.target = target;
39652 dperm.vmode = mode;
39653 dperm.nelt = GET_MODE_NUNITS (mode);
39654 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39655 dperm.one_operand_p = true;
39656
39657 /* Extend to SImode using a paradoxical SUBREG. */
39658 tmp1 = gen_reg_rtx (SImode);
39659 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39660
39661 /* Insert the SImode value as low element of a V4SImode vector. */
39662 tmp2 = gen_reg_rtx (V4SImode);
39663 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39664 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39665
39666 ok = (expand_vec_perm_1 (&dperm)
39667 || expand_vec_perm_broadcast_1 (&dperm));
39668 gcc_assert (ok);
39669 return ok;
39670 }
39671 goto widen;
39672
39673 case V16QImode:
39674 if (TARGET_SSE2)
39675 goto permute;
39676 goto widen;
39677
39678 widen:
39679 /* Replicate the value once into the next wider mode and recurse. */
39680 {
39681 enum machine_mode smode, wsmode, wvmode;
39682 rtx x;
39683
39684 smode = GET_MODE_INNER (mode);
39685 wvmode = get_mode_wider_vector (mode);
39686 wsmode = GET_MODE_INNER (wvmode);
39687
39688 val = convert_modes (wsmode, smode, val, true);
39689 x = expand_simple_binop (wsmode, ASHIFT, val,
39690 GEN_INT (GET_MODE_BITSIZE (smode)),
39691 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39692 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39693
39694 x = gen_reg_rtx (wvmode);
39695 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39696 gcc_assert (ok);
39697 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39698 return ok;
39699 }
39700
39701 case V16HImode:
39702 case V32QImode:
39703 {
39704 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39705 rtx x = gen_reg_rtx (hvmode);
39706
39707 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39708 gcc_assert (ok);
39709
39710 x = gen_rtx_VEC_CONCAT (mode, x, x);
39711 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39712 }
39713 return true;
39714
39715 default:
39716 return false;
39717 }
39718 }
39719
39720 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39721 whose ONE_VAR element is VAR, and other elements are zero. Return true
39722 if successful. */
39723
39724 static bool
39725 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39726 rtx target, rtx var, int one_var)
39727 {
39728 enum machine_mode vsimode;
39729 rtx new_target;
39730 rtx x, tmp;
39731 bool use_vector_set = false;
39732
39733 switch (mode)
39734 {
39735 case V2DImode:
39736 /* For SSE4.1, we normally use vector set. But if the second
39737 element is zero and inter-unit moves are OK, we use movq
39738 instead. */
39739 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39740 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39741 && one_var == 0));
39742 break;
39743 case V16QImode:
39744 case V4SImode:
39745 case V4SFmode:
39746 use_vector_set = TARGET_SSE4_1;
39747 break;
39748 case V8HImode:
39749 use_vector_set = TARGET_SSE2;
39750 break;
39751 case V4HImode:
39752 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39753 break;
39754 case V32QImode:
39755 case V16HImode:
39756 case V8SImode:
39757 case V8SFmode:
39758 case V4DFmode:
39759 use_vector_set = TARGET_AVX;
39760 break;
39761 case V4DImode:
39762 /* Use ix86_expand_vector_set in 64bit mode only. */
39763 use_vector_set = TARGET_AVX && TARGET_64BIT;
39764 break;
39765 default:
39766 break;
39767 }
39768
39769 if (use_vector_set)
39770 {
39771 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39772 var = force_reg (GET_MODE_INNER (mode), var);
39773 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39774 return true;
39775 }
39776
39777 switch (mode)
39778 {
39779 case V2SFmode:
39780 case V2SImode:
39781 if (!mmx_ok)
39782 return false;
39783 /* FALLTHRU */
39784
39785 case V2DFmode:
39786 case V2DImode:
39787 if (one_var != 0)
39788 return false;
39789 var = force_reg (GET_MODE_INNER (mode), var);
39790 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39791 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39792 return true;
39793
39794 case V4SFmode:
39795 case V4SImode:
39796 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39797 new_target = gen_reg_rtx (mode);
39798 else
39799 new_target = target;
39800 var = force_reg (GET_MODE_INNER (mode), var);
39801 x = gen_rtx_VEC_DUPLICATE (mode, var);
39802 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39803 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39804 if (one_var != 0)
39805 {
39806 /* We need to shuffle the value to the correct position, so
39807 create a new pseudo to store the intermediate result. */
39808
39809 /* With SSE2, we can use the integer shuffle insns. */
39810 if (mode != V4SFmode && TARGET_SSE2)
39811 {
39812 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39813 const1_rtx,
39814 GEN_INT (one_var == 1 ? 0 : 1),
39815 GEN_INT (one_var == 2 ? 0 : 1),
39816 GEN_INT (one_var == 3 ? 0 : 1)));
39817 if (target != new_target)
39818 emit_move_insn (target, new_target);
39819 return true;
39820 }
39821
39822 /* Otherwise convert the intermediate result to V4SFmode and
39823 use the SSE1 shuffle instructions. */
39824 if (mode != V4SFmode)
39825 {
39826 tmp = gen_reg_rtx (V4SFmode);
39827 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39828 }
39829 else
39830 tmp = new_target;
39831
39832 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39833 const1_rtx,
39834 GEN_INT (one_var == 1 ? 0 : 1),
39835 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39836 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39837
39838 if (mode != V4SFmode)
39839 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39840 else if (tmp != target)
39841 emit_move_insn (target, tmp);
39842 }
39843 else if (target != new_target)
39844 emit_move_insn (target, new_target);
39845 return true;
39846
39847 case V8HImode:
39848 case V16QImode:
39849 vsimode = V4SImode;
39850 goto widen;
39851 case V4HImode:
39852 case V8QImode:
39853 if (!mmx_ok)
39854 return false;
39855 vsimode = V2SImode;
39856 goto widen;
39857 widen:
39858 if (one_var != 0)
39859 return false;
39860
39861 /* Zero extend the variable element to SImode and recurse. */
39862 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39863
39864 x = gen_reg_rtx (vsimode);
39865 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39866 var, one_var))
39867 gcc_unreachable ();
39868
39869 emit_move_insn (target, gen_lowpart (mode, x));
39870 return true;
39871
39872 default:
39873 return false;
39874 }
39875 }
39876
39877 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39878 consisting of the values in VALS. It is known that all elements
39879 except ONE_VAR are constants. Return true if successful. */
39880
39881 static bool
39882 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39883 rtx target, rtx vals, int one_var)
39884 {
39885 rtx var = XVECEXP (vals, 0, one_var);
39886 enum machine_mode wmode;
39887 rtx const_vec, x;
39888
39889 const_vec = copy_rtx (vals);
39890 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39891 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39892
39893 switch (mode)
39894 {
39895 case V2DFmode:
39896 case V2DImode:
39897 case V2SFmode:
39898 case V2SImode:
39899 /* For the two element vectors, it's just as easy to use
39900 the general case. */
39901 return false;
39902
39903 case V4DImode:
39904 /* Use ix86_expand_vector_set in 64bit mode only. */
39905 if (!TARGET_64BIT)
39906 return false;
39907 case V4DFmode:
39908 case V8SFmode:
39909 case V8SImode:
39910 case V16HImode:
39911 case V32QImode:
39912 case V4SFmode:
39913 case V4SImode:
39914 case V8HImode:
39915 case V4HImode:
39916 break;
39917
39918 case V16QImode:
39919 if (TARGET_SSE4_1)
39920 break;
39921 wmode = V8HImode;
39922 goto widen;
39923 case V8QImode:
39924 wmode = V4HImode;
39925 goto widen;
39926 widen:
39927 /* There's no way to set one QImode entry easily. Combine
39928 the variable value with its adjacent constant value, and
39929 promote to an HImode set. */
39930 x = XVECEXP (vals, 0, one_var ^ 1);
39931 if (one_var & 1)
39932 {
39933 var = convert_modes (HImode, QImode, var, true);
39934 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39935 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39936 x = GEN_INT (INTVAL (x) & 0xff);
39937 }
39938 else
39939 {
39940 var = convert_modes (HImode, QImode, var, true);
39941 x = gen_int_mode (INTVAL (x) << 8, HImode);
39942 }
39943 if (x != const0_rtx)
39944 var = expand_simple_binop (HImode, IOR, var, x, var,
39945 1, OPTAB_LIB_WIDEN);
39946
39947 x = gen_reg_rtx (wmode);
39948 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39949 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39950
39951 emit_move_insn (target, gen_lowpart (mode, x));
39952 return true;
39953
39954 default:
39955 return false;
39956 }
39957
39958 emit_move_insn (target, const_vec);
39959 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39960 return true;
39961 }
39962
39963 /* A subroutine of ix86_expand_vector_init_general. Use vector
39964 concatenate to handle the most general case: all values variable,
39965 and none identical. */
39966
39967 static void
39968 ix86_expand_vector_init_concat (enum machine_mode mode,
39969 rtx target, rtx *ops, int n)
39970 {
39971 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39972 rtx first[16], second[8], third[4];
39973 rtvec v;
39974 int i, j;
39975
39976 switch (n)
39977 {
39978 case 2:
39979 switch (mode)
39980 {
39981 case V16SImode:
39982 cmode = V8SImode;
39983 break;
39984 case V16SFmode:
39985 cmode = V8SFmode;
39986 break;
39987 case V8DImode:
39988 cmode = V4DImode;
39989 break;
39990 case V8DFmode:
39991 cmode = V4DFmode;
39992 break;
39993 case V8SImode:
39994 cmode = V4SImode;
39995 break;
39996 case V8SFmode:
39997 cmode = V4SFmode;
39998 break;
39999 case V4DImode:
40000 cmode = V2DImode;
40001 break;
40002 case V4DFmode:
40003 cmode = V2DFmode;
40004 break;
40005 case V4SImode:
40006 cmode = V2SImode;
40007 break;
40008 case V4SFmode:
40009 cmode = V2SFmode;
40010 break;
40011 case V2DImode:
40012 cmode = DImode;
40013 break;
40014 case V2SImode:
40015 cmode = SImode;
40016 break;
40017 case V2DFmode:
40018 cmode = DFmode;
40019 break;
40020 case V2SFmode:
40021 cmode = SFmode;
40022 break;
40023 default:
40024 gcc_unreachable ();
40025 }
40026
40027 if (!register_operand (ops[1], cmode))
40028 ops[1] = force_reg (cmode, ops[1]);
40029 if (!register_operand (ops[0], cmode))
40030 ops[0] = force_reg (cmode, ops[0]);
40031 emit_insn (gen_rtx_SET (VOIDmode, target,
40032 gen_rtx_VEC_CONCAT (mode, ops[0],
40033 ops[1])));
40034 break;
40035
40036 case 4:
40037 switch (mode)
40038 {
40039 case V4DImode:
40040 cmode = V2DImode;
40041 break;
40042 case V4DFmode:
40043 cmode = V2DFmode;
40044 break;
40045 case V4SImode:
40046 cmode = V2SImode;
40047 break;
40048 case V4SFmode:
40049 cmode = V2SFmode;
40050 break;
40051 default:
40052 gcc_unreachable ();
40053 }
40054 goto half;
40055
40056 case 8:
40057 switch (mode)
40058 {
40059 case V8DImode:
40060 cmode = V2DImode;
40061 hmode = V4DImode;
40062 break;
40063 case V8DFmode:
40064 cmode = V2DFmode;
40065 hmode = V4DFmode;
40066 break;
40067 case V8SImode:
40068 cmode = V2SImode;
40069 hmode = V4SImode;
40070 break;
40071 case V8SFmode:
40072 cmode = V2SFmode;
40073 hmode = V4SFmode;
40074 break;
40075 default:
40076 gcc_unreachable ();
40077 }
40078 goto half;
40079
40080 case 16:
40081 switch (mode)
40082 {
40083 case V16SImode:
40084 cmode = V2SImode;
40085 hmode = V4SImode;
40086 gmode = V8SImode;
40087 break;
40088 case V16SFmode:
40089 cmode = V2SFmode;
40090 hmode = V4SFmode;
40091 gmode = V8SFmode;
40092 break;
40093 default:
40094 gcc_unreachable ();
40095 }
40096 goto half;
40097
40098 half:
40099 /* FIXME: We process inputs backward to help RA. PR 36222. */
40100 i = n - 1;
40101 j = (n >> 1) - 1;
40102 for (; i > 0; i -= 2, j--)
40103 {
40104 first[j] = gen_reg_rtx (cmode);
40105 v = gen_rtvec (2, ops[i - 1], ops[i]);
40106 ix86_expand_vector_init (false, first[j],
40107 gen_rtx_PARALLEL (cmode, v));
40108 }
40109
40110 n >>= 1;
40111 if (n > 4)
40112 {
40113 gcc_assert (hmode != VOIDmode);
40114 gcc_assert (gmode != VOIDmode);
40115 for (i = j = 0; i < n; i += 2, j++)
40116 {
40117 second[j] = gen_reg_rtx (hmode);
40118 ix86_expand_vector_init_concat (hmode, second [j],
40119 &first [i], 2);
40120 }
40121 n >>= 1;
40122 for (i = j = 0; i < n; i += 2, j++)
40123 {
40124 third[j] = gen_reg_rtx (gmode);
40125 ix86_expand_vector_init_concat (gmode, third[j],
40126 &second[i], 2);
40127 }
40128 n >>= 1;
40129 ix86_expand_vector_init_concat (mode, target, third, n);
40130 }
40131 else if (n > 2)
40132 {
40133 gcc_assert (hmode != VOIDmode);
40134 for (i = j = 0; i < n; i += 2, j++)
40135 {
40136 second[j] = gen_reg_rtx (hmode);
40137 ix86_expand_vector_init_concat (hmode, second [j],
40138 &first [i], 2);
40139 }
40140 n >>= 1;
40141 ix86_expand_vector_init_concat (mode, target, second, n);
40142 }
40143 else
40144 ix86_expand_vector_init_concat (mode, target, first, n);
40145 break;
40146
40147 default:
40148 gcc_unreachable ();
40149 }
40150 }
40151
40152 /* A subroutine of ix86_expand_vector_init_general. Use vector
40153 interleave to handle the most general case: all values variable,
40154 and none identical. */
40155
40156 static void
40157 ix86_expand_vector_init_interleave (enum machine_mode mode,
40158 rtx target, rtx *ops, int n)
40159 {
40160 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40161 int i, j;
40162 rtx op0, op1;
40163 rtx (*gen_load_even) (rtx, rtx, rtx);
40164 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40165 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40166
40167 switch (mode)
40168 {
40169 case V8HImode:
40170 gen_load_even = gen_vec_setv8hi;
40171 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40172 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40173 inner_mode = HImode;
40174 first_imode = V4SImode;
40175 second_imode = V2DImode;
40176 third_imode = VOIDmode;
40177 break;
40178 case V16QImode:
40179 gen_load_even = gen_vec_setv16qi;
40180 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40181 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40182 inner_mode = QImode;
40183 first_imode = V8HImode;
40184 second_imode = V4SImode;
40185 third_imode = V2DImode;
40186 break;
40187 default:
40188 gcc_unreachable ();
40189 }
40190
40191 for (i = 0; i < n; i++)
40192 {
40193 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40194 op0 = gen_reg_rtx (SImode);
40195 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40196
40197 /* Insert the SImode value as low element of V4SImode vector. */
40198 op1 = gen_reg_rtx (V4SImode);
40199 op0 = gen_rtx_VEC_MERGE (V4SImode,
40200 gen_rtx_VEC_DUPLICATE (V4SImode,
40201 op0),
40202 CONST0_RTX (V4SImode),
40203 const1_rtx);
40204 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40205
40206 /* Cast the V4SImode vector back to a vector in orignal mode. */
40207 op0 = gen_reg_rtx (mode);
40208 emit_move_insn (op0, gen_lowpart (mode, op1));
40209
40210 /* Load even elements into the second position. */
40211 emit_insn (gen_load_even (op0,
40212 force_reg (inner_mode,
40213 ops [i + i + 1]),
40214 const1_rtx));
40215
40216 /* Cast vector to FIRST_IMODE vector. */
40217 ops[i] = gen_reg_rtx (first_imode);
40218 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40219 }
40220
40221 /* Interleave low FIRST_IMODE vectors. */
40222 for (i = j = 0; i < n; i += 2, j++)
40223 {
40224 op0 = gen_reg_rtx (first_imode);
40225 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40226
40227 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40228 ops[j] = gen_reg_rtx (second_imode);
40229 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40230 }
40231
40232 /* Interleave low SECOND_IMODE vectors. */
40233 switch (second_imode)
40234 {
40235 case V4SImode:
40236 for (i = j = 0; i < n / 2; i += 2, j++)
40237 {
40238 op0 = gen_reg_rtx (second_imode);
40239 emit_insn (gen_interleave_second_low (op0, ops[i],
40240 ops[i + 1]));
40241
40242 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40243 vector. */
40244 ops[j] = gen_reg_rtx (third_imode);
40245 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40246 }
40247 second_imode = V2DImode;
40248 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40249 /* FALLTHRU */
40250
40251 case V2DImode:
40252 op0 = gen_reg_rtx (second_imode);
40253 emit_insn (gen_interleave_second_low (op0, ops[0],
40254 ops[1]));
40255
40256 /* Cast the SECOND_IMODE vector back to a vector on original
40257 mode. */
40258 emit_insn (gen_rtx_SET (VOIDmode, target,
40259 gen_lowpart (mode, op0)));
40260 break;
40261
40262 default:
40263 gcc_unreachable ();
40264 }
40265 }
40266
40267 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40268 all values variable, and none identical. */
40269
40270 static void
40271 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40272 rtx target, rtx vals)
40273 {
40274 rtx ops[64], op0, op1;
40275 enum machine_mode half_mode = VOIDmode;
40276 int n, i;
40277
40278 switch (mode)
40279 {
40280 case V2SFmode:
40281 case V2SImode:
40282 if (!mmx_ok && !TARGET_SSE)
40283 break;
40284 /* FALLTHRU */
40285
40286 case V16SImode:
40287 case V16SFmode:
40288 case V8DFmode:
40289 case V8DImode:
40290 case V8SFmode:
40291 case V8SImode:
40292 case V4DFmode:
40293 case V4DImode:
40294 case V4SFmode:
40295 case V4SImode:
40296 case V2DFmode:
40297 case V2DImode:
40298 n = GET_MODE_NUNITS (mode);
40299 for (i = 0; i < n; i++)
40300 ops[i] = XVECEXP (vals, 0, i);
40301 ix86_expand_vector_init_concat (mode, target, ops, n);
40302 return;
40303
40304 case V32QImode:
40305 half_mode = V16QImode;
40306 goto half;
40307
40308 case V16HImode:
40309 half_mode = V8HImode;
40310 goto half;
40311
40312 half:
40313 n = GET_MODE_NUNITS (mode);
40314 for (i = 0; i < n; i++)
40315 ops[i] = XVECEXP (vals, 0, i);
40316 op0 = gen_reg_rtx (half_mode);
40317 op1 = gen_reg_rtx (half_mode);
40318 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40319 n >> 2);
40320 ix86_expand_vector_init_interleave (half_mode, op1,
40321 &ops [n >> 1], n >> 2);
40322 emit_insn (gen_rtx_SET (VOIDmode, target,
40323 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40324 return;
40325
40326 case V16QImode:
40327 if (!TARGET_SSE4_1)
40328 break;
40329 /* FALLTHRU */
40330
40331 case V8HImode:
40332 if (!TARGET_SSE2)
40333 break;
40334
40335 /* Don't use ix86_expand_vector_init_interleave if we can't
40336 move from GPR to SSE register directly. */
40337 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40338 break;
40339
40340 n = GET_MODE_NUNITS (mode);
40341 for (i = 0; i < n; i++)
40342 ops[i] = XVECEXP (vals, 0, i);
40343 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40344 return;
40345
40346 case V4HImode:
40347 case V8QImode:
40348 break;
40349
40350 default:
40351 gcc_unreachable ();
40352 }
40353
40354 {
40355 int i, j, n_elts, n_words, n_elt_per_word;
40356 enum machine_mode inner_mode;
40357 rtx words[4], shift;
40358
40359 inner_mode = GET_MODE_INNER (mode);
40360 n_elts = GET_MODE_NUNITS (mode);
40361 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40362 n_elt_per_word = n_elts / n_words;
40363 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40364
40365 for (i = 0; i < n_words; ++i)
40366 {
40367 rtx word = NULL_RTX;
40368
40369 for (j = 0; j < n_elt_per_word; ++j)
40370 {
40371 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40372 elt = convert_modes (word_mode, inner_mode, elt, true);
40373
40374 if (j == 0)
40375 word = elt;
40376 else
40377 {
40378 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40379 word, 1, OPTAB_LIB_WIDEN);
40380 word = expand_simple_binop (word_mode, IOR, word, elt,
40381 word, 1, OPTAB_LIB_WIDEN);
40382 }
40383 }
40384
40385 words[i] = word;
40386 }
40387
40388 if (n_words == 1)
40389 emit_move_insn (target, gen_lowpart (mode, words[0]));
40390 else if (n_words == 2)
40391 {
40392 rtx tmp = gen_reg_rtx (mode);
40393 emit_clobber (tmp);
40394 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40395 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40396 emit_move_insn (target, tmp);
40397 }
40398 else if (n_words == 4)
40399 {
40400 rtx tmp = gen_reg_rtx (V4SImode);
40401 gcc_assert (word_mode == SImode);
40402 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40403 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40404 emit_move_insn (target, gen_lowpart (mode, tmp));
40405 }
40406 else
40407 gcc_unreachable ();
40408 }
40409 }
40410
40411 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40412 instructions unless MMX_OK is true. */
40413
40414 void
40415 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40416 {
40417 enum machine_mode mode = GET_MODE (target);
40418 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40419 int n_elts = GET_MODE_NUNITS (mode);
40420 int n_var = 0, one_var = -1;
40421 bool all_same = true, all_const_zero = true;
40422 int i;
40423 rtx x;
40424
40425 for (i = 0; i < n_elts; ++i)
40426 {
40427 x = XVECEXP (vals, 0, i);
40428 if (!(CONST_INT_P (x)
40429 || GET_CODE (x) == CONST_DOUBLE
40430 || GET_CODE (x) == CONST_FIXED))
40431 n_var++, one_var = i;
40432 else if (x != CONST0_RTX (inner_mode))
40433 all_const_zero = false;
40434 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40435 all_same = false;
40436 }
40437
40438 /* Constants are best loaded from the constant pool. */
40439 if (n_var == 0)
40440 {
40441 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40442 return;
40443 }
40444
40445 /* If all values are identical, broadcast the value. */
40446 if (all_same
40447 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40448 XVECEXP (vals, 0, 0)))
40449 return;
40450
40451 /* Values where only one field is non-constant are best loaded from
40452 the pool and overwritten via move later. */
40453 if (n_var == 1)
40454 {
40455 if (all_const_zero
40456 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40457 XVECEXP (vals, 0, one_var),
40458 one_var))
40459 return;
40460
40461 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40462 return;
40463 }
40464
40465 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40466 }
40467
40468 void
40469 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40470 {
40471 enum machine_mode mode = GET_MODE (target);
40472 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40473 enum machine_mode half_mode;
40474 bool use_vec_merge = false;
40475 rtx tmp;
40476 static rtx (*gen_extract[6][2]) (rtx, rtx)
40477 = {
40478 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40479 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40480 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40481 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40482 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40483 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40484 };
40485 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40486 = {
40487 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40488 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40489 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40490 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40491 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40492 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40493 };
40494 int i, j, n;
40495
40496 switch (mode)
40497 {
40498 case V2SFmode:
40499 case V2SImode:
40500 if (mmx_ok)
40501 {
40502 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40503 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40504 if (elt == 0)
40505 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40506 else
40507 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40508 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40509 return;
40510 }
40511 break;
40512
40513 case V2DImode:
40514 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40515 if (use_vec_merge)
40516 break;
40517
40518 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40519 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40520 if (elt == 0)
40521 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40522 else
40523 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40524 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40525 return;
40526
40527 case V2DFmode:
40528 {
40529 rtx op0, op1;
40530
40531 /* For the two element vectors, we implement a VEC_CONCAT with
40532 the extraction of the other element. */
40533
40534 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40535 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40536
40537 if (elt == 0)
40538 op0 = val, op1 = tmp;
40539 else
40540 op0 = tmp, op1 = val;
40541
40542 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40543 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40544 }
40545 return;
40546
40547 case V4SFmode:
40548 use_vec_merge = TARGET_SSE4_1;
40549 if (use_vec_merge)
40550 break;
40551
40552 switch (elt)
40553 {
40554 case 0:
40555 use_vec_merge = true;
40556 break;
40557
40558 case 1:
40559 /* tmp = target = A B C D */
40560 tmp = copy_to_reg (target);
40561 /* target = A A B B */
40562 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40563 /* target = X A B B */
40564 ix86_expand_vector_set (false, target, val, 0);
40565 /* target = A X C D */
40566 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40567 const1_rtx, const0_rtx,
40568 GEN_INT (2+4), GEN_INT (3+4)));
40569 return;
40570
40571 case 2:
40572 /* tmp = target = A B C D */
40573 tmp = copy_to_reg (target);
40574 /* tmp = X B C D */
40575 ix86_expand_vector_set (false, tmp, val, 0);
40576 /* target = A B X D */
40577 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40578 const0_rtx, const1_rtx,
40579 GEN_INT (0+4), GEN_INT (3+4)));
40580 return;
40581
40582 case 3:
40583 /* tmp = target = A B C D */
40584 tmp = copy_to_reg (target);
40585 /* tmp = X B C D */
40586 ix86_expand_vector_set (false, tmp, val, 0);
40587 /* target = A B X D */
40588 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40589 const0_rtx, const1_rtx,
40590 GEN_INT (2+4), GEN_INT (0+4)));
40591 return;
40592
40593 default:
40594 gcc_unreachable ();
40595 }
40596 break;
40597
40598 case V4SImode:
40599 use_vec_merge = TARGET_SSE4_1;
40600 if (use_vec_merge)
40601 break;
40602
40603 /* Element 0 handled by vec_merge below. */
40604 if (elt == 0)
40605 {
40606 use_vec_merge = true;
40607 break;
40608 }
40609
40610 if (TARGET_SSE2)
40611 {
40612 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40613 store into element 0, then shuffle them back. */
40614
40615 rtx order[4];
40616
40617 order[0] = GEN_INT (elt);
40618 order[1] = const1_rtx;
40619 order[2] = const2_rtx;
40620 order[3] = GEN_INT (3);
40621 order[elt] = const0_rtx;
40622
40623 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40624 order[1], order[2], order[3]));
40625
40626 ix86_expand_vector_set (false, target, val, 0);
40627
40628 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40629 order[1], order[2], order[3]));
40630 }
40631 else
40632 {
40633 /* For SSE1, we have to reuse the V4SF code. */
40634 rtx t = gen_reg_rtx (V4SFmode);
40635 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40636 emit_move_insn (target, gen_lowpart (mode, t));
40637 }
40638 return;
40639
40640 case V8HImode:
40641 use_vec_merge = TARGET_SSE2;
40642 break;
40643 case V4HImode:
40644 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40645 break;
40646
40647 case V16QImode:
40648 use_vec_merge = TARGET_SSE4_1;
40649 break;
40650
40651 case V8QImode:
40652 break;
40653
40654 case V32QImode:
40655 half_mode = V16QImode;
40656 j = 0;
40657 n = 16;
40658 goto half;
40659
40660 case V16HImode:
40661 half_mode = V8HImode;
40662 j = 1;
40663 n = 8;
40664 goto half;
40665
40666 case V8SImode:
40667 half_mode = V4SImode;
40668 j = 2;
40669 n = 4;
40670 goto half;
40671
40672 case V4DImode:
40673 half_mode = V2DImode;
40674 j = 3;
40675 n = 2;
40676 goto half;
40677
40678 case V8SFmode:
40679 half_mode = V4SFmode;
40680 j = 4;
40681 n = 4;
40682 goto half;
40683
40684 case V4DFmode:
40685 half_mode = V2DFmode;
40686 j = 5;
40687 n = 2;
40688 goto half;
40689
40690 half:
40691 /* Compute offset. */
40692 i = elt / n;
40693 elt %= n;
40694
40695 gcc_assert (i <= 1);
40696
40697 /* Extract the half. */
40698 tmp = gen_reg_rtx (half_mode);
40699 emit_insn (gen_extract[j][i] (tmp, target));
40700
40701 /* Put val in tmp at elt. */
40702 ix86_expand_vector_set (false, tmp, val, elt);
40703
40704 /* Put it back. */
40705 emit_insn (gen_insert[j][i] (target, target, tmp));
40706 return;
40707
40708 default:
40709 break;
40710 }
40711
40712 if (use_vec_merge)
40713 {
40714 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40715 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40716 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40717 }
40718 else
40719 {
40720 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40721
40722 emit_move_insn (mem, target);
40723
40724 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40725 emit_move_insn (tmp, val);
40726
40727 emit_move_insn (target, mem);
40728 }
40729 }
40730
40731 void
40732 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40733 {
40734 enum machine_mode mode = GET_MODE (vec);
40735 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40736 bool use_vec_extr = false;
40737 rtx tmp;
40738
40739 switch (mode)
40740 {
40741 case V2SImode:
40742 case V2SFmode:
40743 if (!mmx_ok)
40744 break;
40745 /* FALLTHRU */
40746
40747 case V2DFmode:
40748 case V2DImode:
40749 use_vec_extr = true;
40750 break;
40751
40752 case V4SFmode:
40753 use_vec_extr = TARGET_SSE4_1;
40754 if (use_vec_extr)
40755 break;
40756
40757 switch (elt)
40758 {
40759 case 0:
40760 tmp = vec;
40761 break;
40762
40763 case 1:
40764 case 3:
40765 tmp = gen_reg_rtx (mode);
40766 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40767 GEN_INT (elt), GEN_INT (elt),
40768 GEN_INT (elt+4), GEN_INT (elt+4)));
40769 break;
40770
40771 case 2:
40772 tmp = gen_reg_rtx (mode);
40773 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40774 break;
40775
40776 default:
40777 gcc_unreachable ();
40778 }
40779 vec = tmp;
40780 use_vec_extr = true;
40781 elt = 0;
40782 break;
40783
40784 case V4SImode:
40785 use_vec_extr = TARGET_SSE4_1;
40786 if (use_vec_extr)
40787 break;
40788
40789 if (TARGET_SSE2)
40790 {
40791 switch (elt)
40792 {
40793 case 0:
40794 tmp = vec;
40795 break;
40796
40797 case 1:
40798 case 3:
40799 tmp = gen_reg_rtx (mode);
40800 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40801 GEN_INT (elt), GEN_INT (elt),
40802 GEN_INT (elt), GEN_INT (elt)));
40803 break;
40804
40805 case 2:
40806 tmp = gen_reg_rtx (mode);
40807 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40808 break;
40809
40810 default:
40811 gcc_unreachable ();
40812 }
40813 vec = tmp;
40814 use_vec_extr = true;
40815 elt = 0;
40816 }
40817 else
40818 {
40819 /* For SSE1, we have to reuse the V4SF code. */
40820 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40821 gen_lowpart (V4SFmode, vec), elt);
40822 return;
40823 }
40824 break;
40825
40826 case V8HImode:
40827 use_vec_extr = TARGET_SSE2;
40828 break;
40829 case V4HImode:
40830 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40831 break;
40832
40833 case V16QImode:
40834 use_vec_extr = TARGET_SSE4_1;
40835 break;
40836
40837 case V8SFmode:
40838 if (TARGET_AVX)
40839 {
40840 tmp = gen_reg_rtx (V4SFmode);
40841 if (elt < 4)
40842 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40843 else
40844 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40845 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40846 return;
40847 }
40848 break;
40849
40850 case V4DFmode:
40851 if (TARGET_AVX)
40852 {
40853 tmp = gen_reg_rtx (V2DFmode);
40854 if (elt < 2)
40855 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40856 else
40857 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40858 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40859 return;
40860 }
40861 break;
40862
40863 case V32QImode:
40864 if (TARGET_AVX)
40865 {
40866 tmp = gen_reg_rtx (V16QImode);
40867 if (elt < 16)
40868 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40869 else
40870 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40871 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40872 return;
40873 }
40874 break;
40875
40876 case V16HImode:
40877 if (TARGET_AVX)
40878 {
40879 tmp = gen_reg_rtx (V8HImode);
40880 if (elt < 8)
40881 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40882 else
40883 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40884 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40885 return;
40886 }
40887 break;
40888
40889 case V8SImode:
40890 if (TARGET_AVX)
40891 {
40892 tmp = gen_reg_rtx (V4SImode);
40893 if (elt < 4)
40894 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40895 else
40896 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40897 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40898 return;
40899 }
40900 break;
40901
40902 case V4DImode:
40903 if (TARGET_AVX)
40904 {
40905 tmp = gen_reg_rtx (V2DImode);
40906 if (elt < 2)
40907 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40908 else
40909 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40910 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40911 return;
40912 }
40913 break;
40914
40915 case V16SFmode:
40916 tmp = gen_reg_rtx (V8SFmode);
40917 if (elt < 8)
40918 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40919 else
40920 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40921 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40922 return;
40923
40924 case V8DFmode:
40925 tmp = gen_reg_rtx (V4DFmode);
40926 if (elt < 4)
40927 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40928 else
40929 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40930 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40931 return;
40932
40933 case V16SImode:
40934 tmp = gen_reg_rtx (V8SImode);
40935 if (elt < 8)
40936 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40937 else
40938 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40939 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40940 return;
40941
40942 case V8DImode:
40943 tmp = gen_reg_rtx (V4DImode);
40944 if (elt < 4)
40945 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40946 else
40947 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40948 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40949 return;
40950
40951 case V8QImode:
40952 /* ??? Could extract the appropriate HImode element and shift. */
40953 default:
40954 break;
40955 }
40956
40957 if (use_vec_extr)
40958 {
40959 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40960 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40961
40962 /* Let the rtl optimizers know about the zero extension performed. */
40963 if (inner_mode == QImode || inner_mode == HImode)
40964 {
40965 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40966 target = gen_lowpart (SImode, target);
40967 }
40968
40969 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40970 }
40971 else
40972 {
40973 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40974
40975 emit_move_insn (mem, vec);
40976
40977 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40978 emit_move_insn (target, tmp);
40979 }
40980 }
40981
40982 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40983 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40984 The upper bits of DEST are undefined, though they shouldn't cause
40985 exceptions (some bits from src or all zeros are ok). */
40986
40987 static void
40988 emit_reduc_half (rtx dest, rtx src, int i)
40989 {
40990 rtx tem, d = dest;
40991 switch (GET_MODE (src))
40992 {
40993 case V4SFmode:
40994 if (i == 128)
40995 tem = gen_sse_movhlps (dest, src, src);
40996 else
40997 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40998 GEN_INT (1 + 4), GEN_INT (1 + 4));
40999 break;
41000 case V2DFmode:
41001 tem = gen_vec_interleave_highv2df (dest, src, src);
41002 break;
41003 case V16QImode:
41004 case V8HImode:
41005 case V4SImode:
41006 case V2DImode:
41007 d = gen_reg_rtx (V1TImode);
41008 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41009 GEN_INT (i / 2));
41010 break;
41011 case V8SFmode:
41012 if (i == 256)
41013 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41014 else
41015 tem = gen_avx_shufps256 (dest, src, src,
41016 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41017 break;
41018 case V4DFmode:
41019 if (i == 256)
41020 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41021 else
41022 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41023 break;
41024 case V32QImode:
41025 case V16HImode:
41026 case V8SImode:
41027 case V4DImode:
41028 if (i == 256)
41029 {
41030 if (GET_MODE (dest) != V4DImode)
41031 d = gen_reg_rtx (V4DImode);
41032 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41033 gen_lowpart (V4DImode, src),
41034 const1_rtx);
41035 }
41036 else
41037 {
41038 d = gen_reg_rtx (V2TImode);
41039 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41040 GEN_INT (i / 2));
41041 }
41042 break;
41043 case V16SImode:
41044 case V16SFmode:
41045 case V8DImode:
41046 case V8DFmode:
41047 if (i > 128)
41048 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41049 gen_lowpart (V16SImode, src),
41050 gen_lowpart (V16SImode, src),
41051 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41052 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41053 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41054 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41055 GEN_INT (0xC), GEN_INT (0xD),
41056 GEN_INT (0xE), GEN_INT (0xF),
41057 GEN_INT (0x10), GEN_INT (0x11),
41058 GEN_INT (0x12), GEN_INT (0x13),
41059 GEN_INT (0x14), GEN_INT (0x15),
41060 GEN_INT (0x16), GEN_INT (0x17));
41061 else
41062 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41063 gen_lowpart (V16SImode, src),
41064 GEN_INT (i == 128 ? 0x2 : 0x1),
41065 GEN_INT (0x3),
41066 GEN_INT (0x3),
41067 GEN_INT (0x3),
41068 GEN_INT (i == 128 ? 0x6 : 0x5),
41069 GEN_INT (0x7),
41070 GEN_INT (0x7),
41071 GEN_INT (0x7),
41072 GEN_INT (i == 128 ? 0xA : 0x9),
41073 GEN_INT (0xB),
41074 GEN_INT (0xB),
41075 GEN_INT (0xB),
41076 GEN_INT (i == 128 ? 0xE : 0xD),
41077 GEN_INT (0xF),
41078 GEN_INT (0xF),
41079 GEN_INT (0xF));
41080 break;
41081 default:
41082 gcc_unreachable ();
41083 }
41084 emit_insn (tem);
41085 if (d != dest)
41086 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41087 }
41088
41089 /* Expand a vector reduction. FN is the binary pattern to reduce;
41090 DEST is the destination; IN is the input vector. */
41091
41092 void
41093 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41094 {
41095 rtx half, dst, vec = in;
41096 enum machine_mode mode = GET_MODE (in);
41097 int i;
41098
41099 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41100 if (TARGET_SSE4_1
41101 && mode == V8HImode
41102 && fn == gen_uminv8hi3)
41103 {
41104 emit_insn (gen_sse4_1_phminposuw (dest, in));
41105 return;
41106 }
41107
41108 for (i = GET_MODE_BITSIZE (mode);
41109 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41110 i >>= 1)
41111 {
41112 half = gen_reg_rtx (mode);
41113 emit_reduc_half (half, vec, i);
41114 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41115 dst = dest;
41116 else
41117 dst = gen_reg_rtx (mode);
41118 emit_insn (fn (dst, half, vec));
41119 vec = dst;
41120 }
41121 }
41122 \f
41123 /* Target hook for scalar_mode_supported_p. */
41124 static bool
41125 ix86_scalar_mode_supported_p (enum machine_mode mode)
41126 {
41127 if (DECIMAL_FLOAT_MODE_P (mode))
41128 return default_decimal_float_supported_p ();
41129 else if (mode == TFmode)
41130 return true;
41131 else
41132 return default_scalar_mode_supported_p (mode);
41133 }
41134
41135 /* Implements target hook vector_mode_supported_p. */
41136 static bool
41137 ix86_vector_mode_supported_p (enum machine_mode mode)
41138 {
41139 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41140 return true;
41141 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41142 return true;
41143 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41144 return true;
41145 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41146 return true;
41147 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41148 return true;
41149 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41150 return true;
41151 return false;
41152 }
41153
41154 /* Target hook for c_mode_for_suffix. */
41155 static enum machine_mode
41156 ix86_c_mode_for_suffix (char suffix)
41157 {
41158 if (suffix == 'q')
41159 return TFmode;
41160 if (suffix == 'w')
41161 return XFmode;
41162
41163 return VOIDmode;
41164 }
41165
41166 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41167
41168 We do this in the new i386 backend to maintain source compatibility
41169 with the old cc0-based compiler. */
41170
41171 static tree
41172 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41173 tree inputs ATTRIBUTE_UNUSED,
41174 tree clobbers)
41175 {
41176 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41177 clobbers);
41178 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41179 clobbers);
41180 return clobbers;
41181 }
41182
41183 /* Implements target vector targetm.asm.encode_section_info. */
41184
41185 static void ATTRIBUTE_UNUSED
41186 ix86_encode_section_info (tree decl, rtx rtl, int first)
41187 {
41188 default_encode_section_info (decl, rtl, first);
41189
41190 if (TREE_CODE (decl) == VAR_DECL
41191 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41192 && ix86_in_large_data_p (decl))
41193 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41194 }
41195
41196 /* Worker function for REVERSE_CONDITION. */
41197
41198 enum rtx_code
41199 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41200 {
41201 return (mode != CCFPmode && mode != CCFPUmode
41202 ? reverse_condition (code)
41203 : reverse_condition_maybe_unordered (code));
41204 }
41205
41206 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41207 to OPERANDS[0]. */
41208
41209 const char *
41210 output_387_reg_move (rtx insn, rtx *operands)
41211 {
41212 if (REG_P (operands[0]))
41213 {
41214 if (REG_P (operands[1])
41215 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41216 {
41217 if (REGNO (operands[0]) == FIRST_STACK_REG)
41218 return output_387_ffreep (operands, 0);
41219 return "fstp\t%y0";
41220 }
41221 if (STACK_TOP_P (operands[0]))
41222 return "fld%Z1\t%y1";
41223 return "fst\t%y0";
41224 }
41225 else if (MEM_P (operands[0]))
41226 {
41227 gcc_assert (REG_P (operands[1]));
41228 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41229 return "fstp%Z0\t%y0";
41230 else
41231 {
41232 /* There is no non-popping store to memory for XFmode.
41233 So if we need one, follow the store with a load. */
41234 if (GET_MODE (operands[0]) == XFmode)
41235 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41236 else
41237 return "fst%Z0\t%y0";
41238 }
41239 }
41240 else
41241 gcc_unreachable();
41242 }
41243
41244 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41245 FP status register is set. */
41246
41247 void
41248 ix86_emit_fp_unordered_jump (rtx label)
41249 {
41250 rtx reg = gen_reg_rtx (HImode);
41251 rtx temp;
41252
41253 emit_insn (gen_x86_fnstsw_1 (reg));
41254
41255 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41256 {
41257 emit_insn (gen_x86_sahf_1 (reg));
41258
41259 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41260 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41261 }
41262 else
41263 {
41264 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41265
41266 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41267 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41268 }
41269
41270 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41271 gen_rtx_LABEL_REF (VOIDmode, label),
41272 pc_rtx);
41273 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41274
41275 emit_jump_insn (temp);
41276 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41277 }
41278
41279 /* Output code to perform a log1p XFmode calculation. */
41280
41281 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41282 {
41283 rtx label1 = gen_label_rtx ();
41284 rtx label2 = gen_label_rtx ();
41285
41286 rtx tmp = gen_reg_rtx (XFmode);
41287 rtx tmp2 = gen_reg_rtx (XFmode);
41288 rtx test;
41289
41290 emit_insn (gen_absxf2 (tmp, op1));
41291 test = gen_rtx_GE (VOIDmode, tmp,
41292 CONST_DOUBLE_FROM_REAL_VALUE (
41293 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41294 XFmode));
41295 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41296
41297 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41298 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41299 emit_jump (label2);
41300
41301 emit_label (label1);
41302 emit_move_insn (tmp, CONST1_RTX (XFmode));
41303 emit_insn (gen_addxf3 (tmp, op1, tmp));
41304 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41305 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41306
41307 emit_label (label2);
41308 }
41309
41310 /* Emit code for round calculation. */
41311 void ix86_emit_i387_round (rtx op0, rtx op1)
41312 {
41313 enum machine_mode inmode = GET_MODE (op1);
41314 enum machine_mode outmode = GET_MODE (op0);
41315 rtx e1, e2, res, tmp, tmp1, half;
41316 rtx scratch = gen_reg_rtx (HImode);
41317 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41318 rtx jump_label = gen_label_rtx ();
41319 rtx insn;
41320 rtx (*gen_abs) (rtx, rtx);
41321 rtx (*gen_neg) (rtx, rtx);
41322
41323 switch (inmode)
41324 {
41325 case SFmode:
41326 gen_abs = gen_abssf2;
41327 break;
41328 case DFmode:
41329 gen_abs = gen_absdf2;
41330 break;
41331 case XFmode:
41332 gen_abs = gen_absxf2;
41333 break;
41334 default:
41335 gcc_unreachable ();
41336 }
41337
41338 switch (outmode)
41339 {
41340 case SFmode:
41341 gen_neg = gen_negsf2;
41342 break;
41343 case DFmode:
41344 gen_neg = gen_negdf2;
41345 break;
41346 case XFmode:
41347 gen_neg = gen_negxf2;
41348 break;
41349 case HImode:
41350 gen_neg = gen_neghi2;
41351 break;
41352 case SImode:
41353 gen_neg = gen_negsi2;
41354 break;
41355 case DImode:
41356 gen_neg = gen_negdi2;
41357 break;
41358 default:
41359 gcc_unreachable ();
41360 }
41361
41362 e1 = gen_reg_rtx (inmode);
41363 e2 = gen_reg_rtx (inmode);
41364 res = gen_reg_rtx (outmode);
41365
41366 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41367
41368 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41369
41370 /* scratch = fxam(op1) */
41371 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41372 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41373 UNSPEC_FXAM)));
41374 /* e1 = fabs(op1) */
41375 emit_insn (gen_abs (e1, op1));
41376
41377 /* e2 = e1 + 0.5 */
41378 half = force_reg (inmode, half);
41379 emit_insn (gen_rtx_SET (VOIDmode, e2,
41380 gen_rtx_PLUS (inmode, e1, half)));
41381
41382 /* res = floor(e2) */
41383 if (inmode != XFmode)
41384 {
41385 tmp1 = gen_reg_rtx (XFmode);
41386
41387 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41388 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41389 }
41390 else
41391 tmp1 = e2;
41392
41393 switch (outmode)
41394 {
41395 case SFmode:
41396 case DFmode:
41397 {
41398 rtx tmp0 = gen_reg_rtx (XFmode);
41399
41400 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41401
41402 emit_insn (gen_rtx_SET (VOIDmode, res,
41403 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41404 UNSPEC_TRUNC_NOOP)));
41405 }
41406 break;
41407 case XFmode:
41408 emit_insn (gen_frndintxf2_floor (res, tmp1));
41409 break;
41410 case HImode:
41411 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41412 break;
41413 case SImode:
41414 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41415 break;
41416 case DImode:
41417 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41418 break;
41419 default:
41420 gcc_unreachable ();
41421 }
41422
41423 /* flags = signbit(a) */
41424 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41425
41426 /* if (flags) then res = -res */
41427 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41428 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41429 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41430 pc_rtx);
41431 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41432 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41433 JUMP_LABEL (insn) = jump_label;
41434
41435 emit_insn (gen_neg (res, res));
41436
41437 emit_label (jump_label);
41438 LABEL_NUSES (jump_label) = 1;
41439
41440 emit_move_insn (op0, res);
41441 }
41442
41443 /* Output code to perform a Newton-Rhapson approximation of a single precision
41444 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41445
41446 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41447 {
41448 rtx x0, x1, e0, e1;
41449
41450 x0 = gen_reg_rtx (mode);
41451 e0 = gen_reg_rtx (mode);
41452 e1 = gen_reg_rtx (mode);
41453 x1 = gen_reg_rtx (mode);
41454
41455 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41456
41457 b = force_reg (mode, b);
41458
41459 /* x0 = rcp(b) estimate */
41460 if (mode == V16SFmode || mode == V8DFmode)
41461 emit_insn (gen_rtx_SET (VOIDmode, x0,
41462 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41463 UNSPEC_RCP14)));
41464 else
41465 emit_insn (gen_rtx_SET (VOIDmode, x0,
41466 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41467 UNSPEC_RCP)));
41468
41469 /* e0 = x0 * b */
41470 emit_insn (gen_rtx_SET (VOIDmode, e0,
41471 gen_rtx_MULT (mode, x0, b)));
41472
41473 /* e0 = x0 * e0 */
41474 emit_insn (gen_rtx_SET (VOIDmode, e0,
41475 gen_rtx_MULT (mode, x0, e0)));
41476
41477 /* e1 = x0 + x0 */
41478 emit_insn (gen_rtx_SET (VOIDmode, e1,
41479 gen_rtx_PLUS (mode, x0, x0)));
41480
41481 /* x1 = e1 - e0 */
41482 emit_insn (gen_rtx_SET (VOIDmode, x1,
41483 gen_rtx_MINUS (mode, e1, e0)));
41484
41485 /* res = a * x1 */
41486 emit_insn (gen_rtx_SET (VOIDmode, res,
41487 gen_rtx_MULT (mode, a, x1)));
41488 }
41489
41490 /* Output code to perform a Newton-Rhapson approximation of a
41491 single precision floating point [reciprocal] square root. */
41492
41493 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41494 bool recip)
41495 {
41496 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41497 REAL_VALUE_TYPE r;
41498 int unspec;
41499
41500 x0 = gen_reg_rtx (mode);
41501 e0 = gen_reg_rtx (mode);
41502 e1 = gen_reg_rtx (mode);
41503 e2 = gen_reg_rtx (mode);
41504 e3 = gen_reg_rtx (mode);
41505
41506 real_from_integer (&r, VOIDmode, -3, SIGNED);
41507 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41508
41509 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41510 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41511 unspec = UNSPEC_RSQRT;
41512
41513 if (VECTOR_MODE_P (mode))
41514 {
41515 mthree = ix86_build_const_vector (mode, true, mthree);
41516 mhalf = ix86_build_const_vector (mode, true, mhalf);
41517 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41518 if (GET_MODE_SIZE (mode) == 64)
41519 unspec = UNSPEC_RSQRT14;
41520 }
41521
41522 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41523 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41524
41525 a = force_reg (mode, a);
41526
41527 /* x0 = rsqrt(a) estimate */
41528 emit_insn (gen_rtx_SET (VOIDmode, x0,
41529 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41530 unspec)));
41531
41532 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41533 if (!recip)
41534 {
41535 rtx zero, mask;
41536
41537 zero = gen_reg_rtx (mode);
41538 mask = gen_reg_rtx (mode);
41539
41540 zero = force_reg (mode, CONST0_RTX(mode));
41541
41542 /* Handle masked compare. */
41543 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41544 {
41545 mask = gen_reg_rtx (HImode);
41546 /* Imm value 0x4 corresponds to not-equal comparison. */
41547 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41548 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41549 }
41550 else
41551 {
41552 emit_insn (gen_rtx_SET (VOIDmode, mask,
41553 gen_rtx_NE (mode, zero, a)));
41554
41555 emit_insn (gen_rtx_SET (VOIDmode, x0,
41556 gen_rtx_AND (mode, x0, mask)));
41557 }
41558 }
41559
41560 /* e0 = x0 * a */
41561 emit_insn (gen_rtx_SET (VOIDmode, e0,
41562 gen_rtx_MULT (mode, x0, a)));
41563 /* e1 = e0 * x0 */
41564 emit_insn (gen_rtx_SET (VOIDmode, e1,
41565 gen_rtx_MULT (mode, e0, x0)));
41566
41567 /* e2 = e1 - 3. */
41568 mthree = force_reg (mode, mthree);
41569 emit_insn (gen_rtx_SET (VOIDmode, e2,
41570 gen_rtx_PLUS (mode, e1, mthree)));
41571
41572 mhalf = force_reg (mode, mhalf);
41573 if (recip)
41574 /* e3 = -.5 * x0 */
41575 emit_insn (gen_rtx_SET (VOIDmode, e3,
41576 gen_rtx_MULT (mode, x0, mhalf)));
41577 else
41578 /* e3 = -.5 * e0 */
41579 emit_insn (gen_rtx_SET (VOIDmode, e3,
41580 gen_rtx_MULT (mode, e0, mhalf)));
41581 /* ret = e2 * e3 */
41582 emit_insn (gen_rtx_SET (VOIDmode, res,
41583 gen_rtx_MULT (mode, e2, e3)));
41584 }
41585
41586 #ifdef TARGET_SOLARIS
41587 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41588
41589 static void
41590 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41591 tree decl)
41592 {
41593 /* With Binutils 2.15, the "@unwind" marker must be specified on
41594 every occurrence of the ".eh_frame" section, not just the first
41595 one. */
41596 if (TARGET_64BIT
41597 && strcmp (name, ".eh_frame") == 0)
41598 {
41599 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41600 flags & SECTION_WRITE ? "aw" : "a");
41601 return;
41602 }
41603
41604 #ifndef USE_GAS
41605 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41606 {
41607 solaris_elf_asm_comdat_section (name, flags, decl);
41608 return;
41609 }
41610 #endif
41611
41612 default_elf_asm_named_section (name, flags, decl);
41613 }
41614 #endif /* TARGET_SOLARIS */
41615
41616 /* Return the mangling of TYPE if it is an extended fundamental type. */
41617
41618 static const char *
41619 ix86_mangle_type (const_tree type)
41620 {
41621 type = TYPE_MAIN_VARIANT (type);
41622
41623 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41624 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41625 return NULL;
41626
41627 switch (TYPE_MODE (type))
41628 {
41629 case TFmode:
41630 /* __float128 is "g". */
41631 return "g";
41632 case XFmode:
41633 /* "long double" or __float80 is "e". */
41634 return "e";
41635 default:
41636 return NULL;
41637 }
41638 }
41639
41640 /* For 32-bit code we can save PIC register setup by using
41641 __stack_chk_fail_local hidden function instead of calling
41642 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41643 register, so it is better to call __stack_chk_fail directly. */
41644
41645 static tree ATTRIBUTE_UNUSED
41646 ix86_stack_protect_fail (void)
41647 {
41648 return TARGET_64BIT
41649 ? default_external_stack_protect_fail ()
41650 : default_hidden_stack_protect_fail ();
41651 }
41652
41653 /* Select a format to encode pointers in exception handling data. CODE
41654 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41655 true if the symbol may be affected by dynamic relocations.
41656
41657 ??? All x86 object file formats are capable of representing this.
41658 After all, the relocation needed is the same as for the call insn.
41659 Whether or not a particular assembler allows us to enter such, I
41660 guess we'll have to see. */
41661 int
41662 asm_preferred_eh_data_format (int code, int global)
41663 {
41664 if (flag_pic)
41665 {
41666 int type = DW_EH_PE_sdata8;
41667 if (!TARGET_64BIT
41668 || ix86_cmodel == CM_SMALL_PIC
41669 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41670 type = DW_EH_PE_sdata4;
41671 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41672 }
41673 if (ix86_cmodel == CM_SMALL
41674 || (ix86_cmodel == CM_MEDIUM && code))
41675 return DW_EH_PE_udata4;
41676 return DW_EH_PE_absptr;
41677 }
41678 \f
41679 /* Expand copysign from SIGN to the positive value ABS_VALUE
41680 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41681 the sign-bit. */
41682 static void
41683 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41684 {
41685 enum machine_mode mode = GET_MODE (sign);
41686 rtx sgn = gen_reg_rtx (mode);
41687 if (mask == NULL_RTX)
41688 {
41689 enum machine_mode vmode;
41690
41691 if (mode == SFmode)
41692 vmode = V4SFmode;
41693 else if (mode == DFmode)
41694 vmode = V2DFmode;
41695 else
41696 vmode = mode;
41697
41698 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41699 if (!VECTOR_MODE_P (mode))
41700 {
41701 /* We need to generate a scalar mode mask in this case. */
41702 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41703 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41704 mask = gen_reg_rtx (mode);
41705 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41706 }
41707 }
41708 else
41709 mask = gen_rtx_NOT (mode, mask);
41710 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41711 gen_rtx_AND (mode, mask, sign)));
41712 emit_insn (gen_rtx_SET (VOIDmode, result,
41713 gen_rtx_IOR (mode, abs_value, sgn)));
41714 }
41715
41716 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41717 mask for masking out the sign-bit is stored in *SMASK, if that is
41718 non-null. */
41719 static rtx
41720 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41721 {
41722 enum machine_mode vmode, mode = GET_MODE (op0);
41723 rtx xa, mask;
41724
41725 xa = gen_reg_rtx (mode);
41726 if (mode == SFmode)
41727 vmode = V4SFmode;
41728 else if (mode == DFmode)
41729 vmode = V2DFmode;
41730 else
41731 vmode = mode;
41732 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41733 if (!VECTOR_MODE_P (mode))
41734 {
41735 /* We need to generate a scalar mode mask in this case. */
41736 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41737 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41738 mask = gen_reg_rtx (mode);
41739 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41740 }
41741 emit_insn (gen_rtx_SET (VOIDmode, xa,
41742 gen_rtx_AND (mode, op0, mask)));
41743
41744 if (smask)
41745 *smask = mask;
41746
41747 return xa;
41748 }
41749
41750 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41751 swapping the operands if SWAP_OPERANDS is true. The expanded
41752 code is a forward jump to a newly created label in case the
41753 comparison is true. The generated label rtx is returned. */
41754 static rtx
41755 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41756 bool swap_operands)
41757 {
41758 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41759 rtx label, tmp;
41760
41761 if (swap_operands)
41762 {
41763 tmp = op0;
41764 op0 = op1;
41765 op1 = tmp;
41766 }
41767
41768 label = gen_label_rtx ();
41769 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41770 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41771 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41772 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41773 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41774 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41775 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41776 JUMP_LABEL (tmp) = label;
41777
41778 return label;
41779 }
41780
41781 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41782 using comparison code CODE. Operands are swapped for the comparison if
41783 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41784 static rtx
41785 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41786 bool swap_operands)
41787 {
41788 rtx (*insn)(rtx, rtx, rtx, rtx);
41789 enum machine_mode mode = GET_MODE (op0);
41790 rtx mask = gen_reg_rtx (mode);
41791
41792 if (swap_operands)
41793 {
41794 rtx tmp = op0;
41795 op0 = op1;
41796 op1 = tmp;
41797 }
41798
41799 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41800
41801 emit_insn (insn (mask, op0, op1,
41802 gen_rtx_fmt_ee (code, mode, op0, op1)));
41803 return mask;
41804 }
41805
41806 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41807 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41808 static rtx
41809 ix86_gen_TWO52 (enum machine_mode mode)
41810 {
41811 REAL_VALUE_TYPE TWO52r;
41812 rtx TWO52;
41813
41814 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41815 TWO52 = const_double_from_real_value (TWO52r, mode);
41816 TWO52 = force_reg (mode, TWO52);
41817
41818 return TWO52;
41819 }
41820
41821 /* Expand SSE sequence for computing lround from OP1 storing
41822 into OP0. */
41823 void
41824 ix86_expand_lround (rtx op0, rtx op1)
41825 {
41826 /* C code for the stuff we're doing below:
41827 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41828 return (long)tmp;
41829 */
41830 enum machine_mode mode = GET_MODE (op1);
41831 const struct real_format *fmt;
41832 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41833 rtx adj;
41834
41835 /* load nextafter (0.5, 0.0) */
41836 fmt = REAL_MODE_FORMAT (mode);
41837 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41838 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41839
41840 /* adj = copysign (0.5, op1) */
41841 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41842 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41843
41844 /* adj = op1 + adj */
41845 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41846
41847 /* op0 = (imode)adj */
41848 expand_fix (op0, adj, 0);
41849 }
41850
41851 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41852 into OPERAND0. */
41853 void
41854 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41855 {
41856 /* C code for the stuff we're doing below (for do_floor):
41857 xi = (long)op1;
41858 xi -= (double)xi > op1 ? 1 : 0;
41859 return xi;
41860 */
41861 enum machine_mode fmode = GET_MODE (op1);
41862 enum machine_mode imode = GET_MODE (op0);
41863 rtx ireg, freg, label, tmp;
41864
41865 /* reg = (long)op1 */
41866 ireg = gen_reg_rtx (imode);
41867 expand_fix (ireg, op1, 0);
41868
41869 /* freg = (double)reg */
41870 freg = gen_reg_rtx (fmode);
41871 expand_float (freg, ireg, 0);
41872
41873 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41874 label = ix86_expand_sse_compare_and_jump (UNLE,
41875 freg, op1, !do_floor);
41876 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41877 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41878 emit_move_insn (ireg, tmp);
41879
41880 emit_label (label);
41881 LABEL_NUSES (label) = 1;
41882
41883 emit_move_insn (op0, ireg);
41884 }
41885
41886 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41887 result in OPERAND0. */
41888 void
41889 ix86_expand_rint (rtx operand0, rtx operand1)
41890 {
41891 /* C code for the stuff we're doing below:
41892 xa = fabs (operand1);
41893 if (!isless (xa, 2**52))
41894 return operand1;
41895 xa = xa + 2**52 - 2**52;
41896 return copysign (xa, operand1);
41897 */
41898 enum machine_mode mode = GET_MODE (operand0);
41899 rtx res, xa, label, TWO52, mask;
41900
41901 res = gen_reg_rtx (mode);
41902 emit_move_insn (res, operand1);
41903
41904 /* xa = abs (operand1) */
41905 xa = ix86_expand_sse_fabs (res, &mask);
41906
41907 /* if (!isless (xa, TWO52)) goto label; */
41908 TWO52 = ix86_gen_TWO52 (mode);
41909 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41910
41911 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41912 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41913
41914 ix86_sse_copysign_to_positive (res, xa, res, mask);
41915
41916 emit_label (label);
41917 LABEL_NUSES (label) = 1;
41918
41919 emit_move_insn (operand0, res);
41920 }
41921
41922 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41923 into OPERAND0. */
41924 void
41925 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41926 {
41927 /* C code for the stuff we expand below.
41928 double xa = fabs (x), x2;
41929 if (!isless (xa, TWO52))
41930 return x;
41931 xa = xa + TWO52 - TWO52;
41932 x2 = copysign (xa, x);
41933 Compensate. Floor:
41934 if (x2 > x)
41935 x2 -= 1;
41936 Compensate. Ceil:
41937 if (x2 < x)
41938 x2 -= -1;
41939 return x2;
41940 */
41941 enum machine_mode mode = GET_MODE (operand0);
41942 rtx xa, TWO52, tmp, label, one, res, mask;
41943
41944 TWO52 = ix86_gen_TWO52 (mode);
41945
41946 /* Temporary for holding the result, initialized to the input
41947 operand to ease control flow. */
41948 res = gen_reg_rtx (mode);
41949 emit_move_insn (res, operand1);
41950
41951 /* xa = abs (operand1) */
41952 xa = ix86_expand_sse_fabs (res, &mask);
41953
41954 /* if (!isless (xa, TWO52)) goto label; */
41955 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41956
41957 /* xa = xa + TWO52 - TWO52; */
41958 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41959 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41960
41961 /* xa = copysign (xa, operand1) */
41962 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41963
41964 /* generate 1.0 or -1.0 */
41965 one = force_reg (mode,
41966 const_double_from_real_value (do_floor
41967 ? dconst1 : dconstm1, mode));
41968
41969 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41970 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41971 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41972 gen_rtx_AND (mode, one, tmp)));
41973 /* We always need to subtract here to preserve signed zero. */
41974 tmp = expand_simple_binop (mode, MINUS,
41975 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41976 emit_move_insn (res, tmp);
41977
41978 emit_label (label);
41979 LABEL_NUSES (label) = 1;
41980
41981 emit_move_insn (operand0, res);
41982 }
41983
41984 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41985 into OPERAND0. */
41986 void
41987 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41988 {
41989 /* C code for the stuff we expand below.
41990 double xa = fabs (x), x2;
41991 if (!isless (xa, TWO52))
41992 return x;
41993 x2 = (double)(long)x;
41994 Compensate. Floor:
41995 if (x2 > x)
41996 x2 -= 1;
41997 Compensate. Ceil:
41998 if (x2 < x)
41999 x2 += 1;
42000 if (HONOR_SIGNED_ZEROS (mode))
42001 return copysign (x2, x);
42002 return x2;
42003 */
42004 enum machine_mode mode = GET_MODE (operand0);
42005 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42006
42007 TWO52 = ix86_gen_TWO52 (mode);
42008
42009 /* Temporary for holding the result, initialized to the input
42010 operand to ease control flow. */
42011 res = gen_reg_rtx (mode);
42012 emit_move_insn (res, operand1);
42013
42014 /* xa = abs (operand1) */
42015 xa = ix86_expand_sse_fabs (res, &mask);
42016
42017 /* if (!isless (xa, TWO52)) goto label; */
42018 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42019
42020 /* xa = (double)(long)x */
42021 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42022 expand_fix (xi, res, 0);
42023 expand_float (xa, xi, 0);
42024
42025 /* generate 1.0 */
42026 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42027
42028 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42029 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42030 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42031 gen_rtx_AND (mode, one, tmp)));
42032 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42033 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42034 emit_move_insn (res, tmp);
42035
42036 if (HONOR_SIGNED_ZEROS (mode))
42037 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42038
42039 emit_label (label);
42040 LABEL_NUSES (label) = 1;
42041
42042 emit_move_insn (operand0, res);
42043 }
42044
42045 /* Expand SSE sequence for computing round from OPERAND1 storing
42046 into OPERAND0. Sequence that works without relying on DImode truncation
42047 via cvttsd2siq that is only available on 64bit targets. */
42048 void
42049 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42050 {
42051 /* C code for the stuff we expand below.
42052 double xa = fabs (x), xa2, x2;
42053 if (!isless (xa, TWO52))
42054 return x;
42055 Using the absolute value and copying back sign makes
42056 -0.0 -> -0.0 correct.
42057 xa2 = xa + TWO52 - TWO52;
42058 Compensate.
42059 dxa = xa2 - xa;
42060 if (dxa <= -0.5)
42061 xa2 += 1;
42062 else if (dxa > 0.5)
42063 xa2 -= 1;
42064 x2 = copysign (xa2, x);
42065 return x2;
42066 */
42067 enum machine_mode mode = GET_MODE (operand0);
42068 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42069
42070 TWO52 = ix86_gen_TWO52 (mode);
42071
42072 /* Temporary for holding the result, initialized to the input
42073 operand to ease control flow. */
42074 res = gen_reg_rtx (mode);
42075 emit_move_insn (res, operand1);
42076
42077 /* xa = abs (operand1) */
42078 xa = ix86_expand_sse_fabs (res, &mask);
42079
42080 /* if (!isless (xa, TWO52)) goto label; */
42081 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42082
42083 /* xa2 = xa + TWO52 - TWO52; */
42084 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42085 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42086
42087 /* dxa = xa2 - xa; */
42088 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42089
42090 /* generate 0.5, 1.0 and -0.5 */
42091 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42092 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42093 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42094 0, OPTAB_DIRECT);
42095
42096 /* Compensate. */
42097 tmp = gen_reg_rtx (mode);
42098 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42099 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42100 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42101 gen_rtx_AND (mode, one, tmp)));
42102 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42103 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42104 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42105 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42106 gen_rtx_AND (mode, one, tmp)));
42107 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42108
42109 /* res = copysign (xa2, operand1) */
42110 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42111
42112 emit_label (label);
42113 LABEL_NUSES (label) = 1;
42114
42115 emit_move_insn (operand0, res);
42116 }
42117
42118 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42119 into OPERAND0. */
42120 void
42121 ix86_expand_trunc (rtx operand0, rtx operand1)
42122 {
42123 /* C code for SSE variant we expand below.
42124 double xa = fabs (x), x2;
42125 if (!isless (xa, TWO52))
42126 return x;
42127 x2 = (double)(long)x;
42128 if (HONOR_SIGNED_ZEROS (mode))
42129 return copysign (x2, x);
42130 return x2;
42131 */
42132 enum machine_mode mode = GET_MODE (operand0);
42133 rtx xa, xi, TWO52, label, res, mask;
42134
42135 TWO52 = ix86_gen_TWO52 (mode);
42136
42137 /* Temporary for holding the result, initialized to the input
42138 operand to ease control flow. */
42139 res = gen_reg_rtx (mode);
42140 emit_move_insn (res, operand1);
42141
42142 /* xa = abs (operand1) */
42143 xa = ix86_expand_sse_fabs (res, &mask);
42144
42145 /* if (!isless (xa, TWO52)) goto label; */
42146 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42147
42148 /* x = (double)(long)x */
42149 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42150 expand_fix (xi, res, 0);
42151 expand_float (res, xi, 0);
42152
42153 if (HONOR_SIGNED_ZEROS (mode))
42154 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42155
42156 emit_label (label);
42157 LABEL_NUSES (label) = 1;
42158
42159 emit_move_insn (operand0, res);
42160 }
42161
42162 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42163 into OPERAND0. */
42164 void
42165 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42166 {
42167 enum machine_mode mode = GET_MODE (operand0);
42168 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42169
42170 /* C code for SSE variant we expand below.
42171 double xa = fabs (x), x2;
42172 if (!isless (xa, TWO52))
42173 return x;
42174 xa2 = xa + TWO52 - TWO52;
42175 Compensate:
42176 if (xa2 > xa)
42177 xa2 -= 1.0;
42178 x2 = copysign (xa2, x);
42179 return x2;
42180 */
42181
42182 TWO52 = ix86_gen_TWO52 (mode);
42183
42184 /* Temporary for holding the result, initialized to the input
42185 operand to ease control flow. */
42186 res = gen_reg_rtx (mode);
42187 emit_move_insn (res, operand1);
42188
42189 /* xa = abs (operand1) */
42190 xa = ix86_expand_sse_fabs (res, &smask);
42191
42192 /* if (!isless (xa, TWO52)) goto label; */
42193 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42194
42195 /* res = xa + TWO52 - TWO52; */
42196 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42197 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42198 emit_move_insn (res, tmp);
42199
42200 /* generate 1.0 */
42201 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42202
42203 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42204 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42205 emit_insn (gen_rtx_SET (VOIDmode, mask,
42206 gen_rtx_AND (mode, mask, one)));
42207 tmp = expand_simple_binop (mode, MINUS,
42208 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42209 emit_move_insn (res, tmp);
42210
42211 /* res = copysign (res, operand1) */
42212 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42213
42214 emit_label (label);
42215 LABEL_NUSES (label) = 1;
42216
42217 emit_move_insn (operand0, res);
42218 }
42219
42220 /* Expand SSE sequence for computing round from OPERAND1 storing
42221 into OPERAND0. */
42222 void
42223 ix86_expand_round (rtx operand0, rtx operand1)
42224 {
42225 /* C code for the stuff we're doing below:
42226 double xa = fabs (x);
42227 if (!isless (xa, TWO52))
42228 return x;
42229 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42230 return copysign (xa, x);
42231 */
42232 enum machine_mode mode = GET_MODE (operand0);
42233 rtx res, TWO52, xa, label, xi, half, mask;
42234 const struct real_format *fmt;
42235 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42236
42237 /* Temporary for holding the result, initialized to the input
42238 operand to ease control flow. */
42239 res = gen_reg_rtx (mode);
42240 emit_move_insn (res, operand1);
42241
42242 TWO52 = ix86_gen_TWO52 (mode);
42243 xa = ix86_expand_sse_fabs (res, &mask);
42244 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42245
42246 /* load nextafter (0.5, 0.0) */
42247 fmt = REAL_MODE_FORMAT (mode);
42248 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42249 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42250
42251 /* xa = xa + 0.5 */
42252 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42253 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42254
42255 /* xa = (double)(int64_t)xa */
42256 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42257 expand_fix (xi, xa, 0);
42258 expand_float (xa, xi, 0);
42259
42260 /* res = copysign (xa, operand1) */
42261 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42262
42263 emit_label (label);
42264 LABEL_NUSES (label) = 1;
42265
42266 emit_move_insn (operand0, res);
42267 }
42268
42269 /* Expand SSE sequence for computing round
42270 from OP1 storing into OP0 using sse4 round insn. */
42271 void
42272 ix86_expand_round_sse4 (rtx op0, rtx op1)
42273 {
42274 enum machine_mode mode = GET_MODE (op0);
42275 rtx e1, e2, res, half;
42276 const struct real_format *fmt;
42277 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42278 rtx (*gen_copysign) (rtx, rtx, rtx);
42279 rtx (*gen_round) (rtx, rtx, rtx);
42280
42281 switch (mode)
42282 {
42283 case SFmode:
42284 gen_copysign = gen_copysignsf3;
42285 gen_round = gen_sse4_1_roundsf2;
42286 break;
42287 case DFmode:
42288 gen_copysign = gen_copysigndf3;
42289 gen_round = gen_sse4_1_rounddf2;
42290 break;
42291 default:
42292 gcc_unreachable ();
42293 }
42294
42295 /* round (a) = trunc (a + copysign (0.5, a)) */
42296
42297 /* load nextafter (0.5, 0.0) */
42298 fmt = REAL_MODE_FORMAT (mode);
42299 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42300 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42301 half = const_double_from_real_value (pred_half, mode);
42302
42303 /* e1 = copysign (0.5, op1) */
42304 e1 = gen_reg_rtx (mode);
42305 emit_insn (gen_copysign (e1, half, op1));
42306
42307 /* e2 = op1 + e1 */
42308 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42309
42310 /* res = trunc (e2) */
42311 res = gen_reg_rtx (mode);
42312 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42313
42314 emit_move_insn (op0, res);
42315 }
42316 \f
42317
42318 /* Table of valid machine attributes. */
42319 static const struct attribute_spec ix86_attribute_table[] =
42320 {
42321 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42322 affects_type_identity } */
42323 /* Stdcall attribute says callee is responsible for popping arguments
42324 if they are not variable. */
42325 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42326 true },
42327 /* Fastcall attribute says callee is responsible for popping arguments
42328 if they are not variable. */
42329 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42330 true },
42331 /* Thiscall attribute says callee is responsible for popping arguments
42332 if they are not variable. */
42333 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42334 true },
42335 /* Cdecl attribute says the callee is a normal C declaration */
42336 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42337 true },
42338 /* Regparm attribute specifies how many integer arguments are to be
42339 passed in registers. */
42340 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42341 true },
42342 /* Sseregparm attribute says we are using x86_64 calling conventions
42343 for FP arguments. */
42344 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42345 true },
42346 /* The transactional memory builtins are implicitly regparm or fastcall
42347 depending on the ABI. Override the generic do-nothing attribute that
42348 these builtins were declared with. */
42349 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42350 true },
42351 /* force_align_arg_pointer says this function realigns the stack at entry. */
42352 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42353 false, true, true, ix86_handle_cconv_attribute, false },
42354 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42355 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42356 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42357 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42358 false },
42359 #endif
42360 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42361 false },
42362 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42363 false },
42364 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42365 SUBTARGET_ATTRIBUTE_TABLE,
42366 #endif
42367 /* ms_abi and sysv_abi calling convention function attributes. */
42368 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42369 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42370 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42371 false },
42372 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42373 ix86_handle_callee_pop_aggregate_return, true },
42374 /* End element. */
42375 { NULL, 0, 0, false, false, false, NULL, false }
42376 };
42377
42378 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42379 static int
42380 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42381 tree vectype,
42382 int misalign ATTRIBUTE_UNUSED)
42383 {
42384 unsigned elements;
42385
42386 switch (type_of_cost)
42387 {
42388 case scalar_stmt:
42389 return ix86_cost->scalar_stmt_cost;
42390
42391 case scalar_load:
42392 return ix86_cost->scalar_load_cost;
42393
42394 case scalar_store:
42395 return ix86_cost->scalar_store_cost;
42396
42397 case vector_stmt:
42398 return ix86_cost->vec_stmt_cost;
42399
42400 case vector_load:
42401 return ix86_cost->vec_align_load_cost;
42402
42403 case vector_store:
42404 return ix86_cost->vec_store_cost;
42405
42406 case vec_to_scalar:
42407 return ix86_cost->vec_to_scalar_cost;
42408
42409 case scalar_to_vec:
42410 return ix86_cost->scalar_to_vec_cost;
42411
42412 case unaligned_load:
42413 case unaligned_store:
42414 return ix86_cost->vec_unalign_load_cost;
42415
42416 case cond_branch_taken:
42417 return ix86_cost->cond_taken_branch_cost;
42418
42419 case cond_branch_not_taken:
42420 return ix86_cost->cond_not_taken_branch_cost;
42421
42422 case vec_perm:
42423 case vec_promote_demote:
42424 return ix86_cost->vec_stmt_cost;
42425
42426 case vec_construct:
42427 elements = TYPE_VECTOR_SUBPARTS (vectype);
42428 return elements / 2 + 1;
42429
42430 default:
42431 gcc_unreachable ();
42432 }
42433 }
42434
42435 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42436 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42437 insn every time. */
42438
42439 static GTY(()) rtx vselect_insn;
42440
42441 /* Initialize vselect_insn. */
42442
42443 static void
42444 init_vselect_insn (void)
42445 {
42446 unsigned i;
42447 rtx x;
42448
42449 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42450 for (i = 0; i < MAX_VECT_LEN; ++i)
42451 XVECEXP (x, 0, i) = const0_rtx;
42452 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42453 const0_rtx), x);
42454 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42455 start_sequence ();
42456 vselect_insn = emit_insn (x);
42457 end_sequence ();
42458 }
42459
42460 /* Construct (set target (vec_select op0 (parallel perm))) and
42461 return true if that's a valid instruction in the active ISA. */
42462
42463 static bool
42464 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42465 unsigned nelt, bool testing_p)
42466 {
42467 unsigned int i;
42468 rtx x, save_vconcat;
42469 int icode;
42470
42471 if (vselect_insn == NULL_RTX)
42472 init_vselect_insn ();
42473
42474 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42475 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42476 for (i = 0; i < nelt; ++i)
42477 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42478 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42479 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42480 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42481 SET_DEST (PATTERN (vselect_insn)) = target;
42482 icode = recog_memoized (vselect_insn);
42483
42484 if (icode >= 0 && !testing_p)
42485 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42486
42487 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42488 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42489 INSN_CODE (vselect_insn) = -1;
42490
42491 return icode >= 0;
42492 }
42493
42494 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42495
42496 static bool
42497 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42498 const unsigned char *perm, unsigned nelt,
42499 bool testing_p)
42500 {
42501 enum machine_mode v2mode;
42502 rtx x;
42503 bool ok;
42504
42505 if (vselect_insn == NULL_RTX)
42506 init_vselect_insn ();
42507
42508 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42509 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42510 PUT_MODE (x, v2mode);
42511 XEXP (x, 0) = op0;
42512 XEXP (x, 1) = op1;
42513 ok = expand_vselect (target, x, perm, nelt, testing_p);
42514 XEXP (x, 0) = const0_rtx;
42515 XEXP (x, 1) = const0_rtx;
42516 return ok;
42517 }
42518
42519 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42520 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42521
42522 static bool
42523 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42524 {
42525 enum machine_mode vmode = d->vmode;
42526 unsigned i, mask, nelt = d->nelt;
42527 rtx target, op0, op1, x;
42528 rtx rperm[32], vperm;
42529
42530 if (d->one_operand_p)
42531 return false;
42532 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42533 ;
42534 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42535 ;
42536 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42537 ;
42538 else
42539 return false;
42540
42541 /* This is a blend, not a permute. Elements must stay in their
42542 respective lanes. */
42543 for (i = 0; i < nelt; ++i)
42544 {
42545 unsigned e = d->perm[i];
42546 if (!(e == i || e == i + nelt))
42547 return false;
42548 }
42549
42550 if (d->testing_p)
42551 return true;
42552
42553 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42554 decision should be extracted elsewhere, so that we only try that
42555 sequence once all budget==3 options have been tried. */
42556 target = d->target;
42557 op0 = d->op0;
42558 op1 = d->op1;
42559 mask = 0;
42560
42561 switch (vmode)
42562 {
42563 case V4DFmode:
42564 case V8SFmode:
42565 case V2DFmode:
42566 case V4SFmode:
42567 case V8HImode:
42568 case V8SImode:
42569 for (i = 0; i < nelt; ++i)
42570 mask |= (d->perm[i] >= nelt) << i;
42571 break;
42572
42573 case V2DImode:
42574 for (i = 0; i < 2; ++i)
42575 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42576 vmode = V8HImode;
42577 goto do_subreg;
42578
42579 case V4SImode:
42580 for (i = 0; i < 4; ++i)
42581 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42582 vmode = V8HImode;
42583 goto do_subreg;
42584
42585 case V16QImode:
42586 /* See if bytes move in pairs so we can use pblendw with
42587 an immediate argument, rather than pblendvb with a vector
42588 argument. */
42589 for (i = 0; i < 16; i += 2)
42590 if (d->perm[i] + 1 != d->perm[i + 1])
42591 {
42592 use_pblendvb:
42593 for (i = 0; i < nelt; ++i)
42594 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42595
42596 finish_pblendvb:
42597 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42598 vperm = force_reg (vmode, vperm);
42599
42600 if (GET_MODE_SIZE (vmode) == 16)
42601 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42602 else
42603 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42604 if (target != d->target)
42605 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42606 return true;
42607 }
42608
42609 for (i = 0; i < 8; ++i)
42610 mask |= (d->perm[i * 2] >= 16) << i;
42611 vmode = V8HImode;
42612 /* FALLTHRU */
42613
42614 do_subreg:
42615 target = gen_reg_rtx (vmode);
42616 op0 = gen_lowpart (vmode, op0);
42617 op1 = gen_lowpart (vmode, op1);
42618 break;
42619
42620 case V32QImode:
42621 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42622 for (i = 0; i < 32; i += 2)
42623 if (d->perm[i] + 1 != d->perm[i + 1])
42624 goto use_pblendvb;
42625 /* See if bytes move in quadruplets. If yes, vpblendd
42626 with immediate can be used. */
42627 for (i = 0; i < 32; i += 4)
42628 if (d->perm[i] + 2 != d->perm[i + 2])
42629 break;
42630 if (i < 32)
42631 {
42632 /* See if bytes move the same in both lanes. If yes,
42633 vpblendw with immediate can be used. */
42634 for (i = 0; i < 16; i += 2)
42635 if (d->perm[i] + 16 != d->perm[i + 16])
42636 goto use_pblendvb;
42637
42638 /* Use vpblendw. */
42639 for (i = 0; i < 16; ++i)
42640 mask |= (d->perm[i * 2] >= 32) << i;
42641 vmode = V16HImode;
42642 goto do_subreg;
42643 }
42644
42645 /* Use vpblendd. */
42646 for (i = 0; i < 8; ++i)
42647 mask |= (d->perm[i * 4] >= 32) << i;
42648 vmode = V8SImode;
42649 goto do_subreg;
42650
42651 case V16HImode:
42652 /* See if words move in pairs. If yes, vpblendd can be used. */
42653 for (i = 0; i < 16; i += 2)
42654 if (d->perm[i] + 1 != d->perm[i + 1])
42655 break;
42656 if (i < 16)
42657 {
42658 /* See if words move the same in both lanes. If not,
42659 vpblendvb must be used. */
42660 for (i = 0; i < 8; i++)
42661 if (d->perm[i] + 8 != d->perm[i + 8])
42662 {
42663 /* Use vpblendvb. */
42664 for (i = 0; i < 32; ++i)
42665 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42666
42667 vmode = V32QImode;
42668 nelt = 32;
42669 target = gen_reg_rtx (vmode);
42670 op0 = gen_lowpart (vmode, op0);
42671 op1 = gen_lowpart (vmode, op1);
42672 goto finish_pblendvb;
42673 }
42674
42675 /* Use vpblendw. */
42676 for (i = 0; i < 16; ++i)
42677 mask |= (d->perm[i] >= 16) << i;
42678 break;
42679 }
42680
42681 /* Use vpblendd. */
42682 for (i = 0; i < 8; ++i)
42683 mask |= (d->perm[i * 2] >= 16) << i;
42684 vmode = V8SImode;
42685 goto do_subreg;
42686
42687 case V4DImode:
42688 /* Use vpblendd. */
42689 for (i = 0; i < 4; ++i)
42690 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42691 vmode = V8SImode;
42692 goto do_subreg;
42693
42694 default:
42695 gcc_unreachable ();
42696 }
42697
42698 /* This matches five different patterns with the different modes. */
42699 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42700 x = gen_rtx_SET (VOIDmode, target, x);
42701 emit_insn (x);
42702 if (target != d->target)
42703 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42704
42705 return true;
42706 }
42707
42708 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42709 in terms of the variable form of vpermilps.
42710
42711 Note that we will have already failed the immediate input vpermilps,
42712 which requires that the high and low part shuffle be identical; the
42713 variable form doesn't require that. */
42714
42715 static bool
42716 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42717 {
42718 rtx rperm[8], vperm;
42719 unsigned i;
42720
42721 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42722 return false;
42723
42724 /* We can only permute within the 128-bit lane. */
42725 for (i = 0; i < 8; ++i)
42726 {
42727 unsigned e = d->perm[i];
42728 if (i < 4 ? e >= 4 : e < 4)
42729 return false;
42730 }
42731
42732 if (d->testing_p)
42733 return true;
42734
42735 for (i = 0; i < 8; ++i)
42736 {
42737 unsigned e = d->perm[i];
42738
42739 /* Within each 128-bit lane, the elements of op0 are numbered
42740 from 0 and the elements of op1 are numbered from 4. */
42741 if (e >= 8 + 4)
42742 e -= 8;
42743 else if (e >= 4)
42744 e -= 4;
42745
42746 rperm[i] = GEN_INT (e);
42747 }
42748
42749 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42750 vperm = force_reg (V8SImode, vperm);
42751 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42752
42753 return true;
42754 }
42755
42756 /* Return true if permutation D can be performed as VMODE permutation
42757 instead. */
42758
42759 static bool
42760 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42761 {
42762 unsigned int i, j, chunk;
42763
42764 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42765 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42766 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42767 return false;
42768
42769 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42770 return true;
42771
42772 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42773 for (i = 0; i < d->nelt; i += chunk)
42774 if (d->perm[i] & (chunk - 1))
42775 return false;
42776 else
42777 for (j = 1; j < chunk; ++j)
42778 if (d->perm[i] + j != d->perm[i + j])
42779 return false;
42780
42781 return true;
42782 }
42783
42784 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42785 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42786
42787 static bool
42788 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42789 {
42790 unsigned i, nelt, eltsz, mask;
42791 unsigned char perm[32];
42792 enum machine_mode vmode = V16QImode;
42793 rtx rperm[32], vperm, target, op0, op1;
42794
42795 nelt = d->nelt;
42796
42797 if (!d->one_operand_p)
42798 {
42799 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42800 {
42801 if (TARGET_AVX2
42802 && valid_perm_using_mode_p (V2TImode, d))
42803 {
42804 if (d->testing_p)
42805 return true;
42806
42807 /* Use vperm2i128 insn. The pattern uses
42808 V4DImode instead of V2TImode. */
42809 target = d->target;
42810 if (d->vmode != V4DImode)
42811 target = gen_reg_rtx (V4DImode);
42812 op0 = gen_lowpart (V4DImode, d->op0);
42813 op1 = gen_lowpart (V4DImode, d->op1);
42814 rperm[0]
42815 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42816 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42817 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42818 if (target != d->target)
42819 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42820 return true;
42821 }
42822 return false;
42823 }
42824 }
42825 else
42826 {
42827 if (GET_MODE_SIZE (d->vmode) == 16)
42828 {
42829 if (!TARGET_SSSE3)
42830 return false;
42831 }
42832 else if (GET_MODE_SIZE (d->vmode) == 32)
42833 {
42834 if (!TARGET_AVX2)
42835 return false;
42836
42837 /* V4DImode should be already handled through
42838 expand_vselect by vpermq instruction. */
42839 gcc_assert (d->vmode != V4DImode);
42840
42841 vmode = V32QImode;
42842 if (d->vmode == V8SImode
42843 || d->vmode == V16HImode
42844 || d->vmode == V32QImode)
42845 {
42846 /* First see if vpermq can be used for
42847 V8SImode/V16HImode/V32QImode. */
42848 if (valid_perm_using_mode_p (V4DImode, d))
42849 {
42850 for (i = 0; i < 4; i++)
42851 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42852 if (d->testing_p)
42853 return true;
42854 target = gen_reg_rtx (V4DImode);
42855 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42856 perm, 4, false))
42857 {
42858 emit_move_insn (d->target,
42859 gen_lowpart (d->vmode, target));
42860 return true;
42861 }
42862 return false;
42863 }
42864
42865 /* Next see if vpermd can be used. */
42866 if (valid_perm_using_mode_p (V8SImode, d))
42867 vmode = V8SImode;
42868 }
42869 /* Or if vpermps can be used. */
42870 else if (d->vmode == V8SFmode)
42871 vmode = V8SImode;
42872
42873 if (vmode == V32QImode)
42874 {
42875 /* vpshufb only works intra lanes, it is not
42876 possible to shuffle bytes in between the lanes. */
42877 for (i = 0; i < nelt; ++i)
42878 if ((d->perm[i] ^ i) & (nelt / 2))
42879 return false;
42880 }
42881 }
42882 else
42883 return false;
42884 }
42885
42886 if (d->testing_p)
42887 return true;
42888
42889 if (vmode == V8SImode)
42890 for (i = 0; i < 8; ++i)
42891 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42892 else
42893 {
42894 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42895 if (!d->one_operand_p)
42896 mask = 2 * nelt - 1;
42897 else if (vmode == V16QImode)
42898 mask = nelt - 1;
42899 else
42900 mask = nelt / 2 - 1;
42901
42902 for (i = 0; i < nelt; ++i)
42903 {
42904 unsigned j, e = d->perm[i] & mask;
42905 for (j = 0; j < eltsz; ++j)
42906 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42907 }
42908 }
42909
42910 vperm = gen_rtx_CONST_VECTOR (vmode,
42911 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42912 vperm = force_reg (vmode, vperm);
42913
42914 target = d->target;
42915 if (d->vmode != vmode)
42916 target = gen_reg_rtx (vmode);
42917 op0 = gen_lowpart (vmode, d->op0);
42918 if (d->one_operand_p)
42919 {
42920 if (vmode == V16QImode)
42921 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42922 else if (vmode == V32QImode)
42923 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42924 else if (vmode == V8SFmode)
42925 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42926 else
42927 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42928 }
42929 else
42930 {
42931 op1 = gen_lowpart (vmode, d->op1);
42932 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42933 }
42934 if (target != d->target)
42935 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42936
42937 return true;
42938 }
42939
42940 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42941 in a single instruction. */
42942
42943 static bool
42944 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42945 {
42946 unsigned i, nelt = d->nelt;
42947 unsigned char perm2[MAX_VECT_LEN];
42948
42949 /* Check plain VEC_SELECT first, because AVX has instructions that could
42950 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42951 input where SEL+CONCAT may not. */
42952 if (d->one_operand_p)
42953 {
42954 int mask = nelt - 1;
42955 bool identity_perm = true;
42956 bool broadcast_perm = true;
42957
42958 for (i = 0; i < nelt; i++)
42959 {
42960 perm2[i] = d->perm[i] & mask;
42961 if (perm2[i] != i)
42962 identity_perm = false;
42963 if (perm2[i])
42964 broadcast_perm = false;
42965 }
42966
42967 if (identity_perm)
42968 {
42969 if (!d->testing_p)
42970 emit_move_insn (d->target, d->op0);
42971 return true;
42972 }
42973 else if (broadcast_perm && TARGET_AVX2)
42974 {
42975 /* Use vpbroadcast{b,w,d}. */
42976 rtx (*gen) (rtx, rtx) = NULL;
42977 switch (d->vmode)
42978 {
42979 case V32QImode:
42980 gen = gen_avx2_pbroadcastv32qi_1;
42981 break;
42982 case V16HImode:
42983 gen = gen_avx2_pbroadcastv16hi_1;
42984 break;
42985 case V8SImode:
42986 gen = gen_avx2_pbroadcastv8si_1;
42987 break;
42988 case V16QImode:
42989 gen = gen_avx2_pbroadcastv16qi;
42990 break;
42991 case V8HImode:
42992 gen = gen_avx2_pbroadcastv8hi;
42993 break;
42994 case V8SFmode:
42995 gen = gen_avx2_vec_dupv8sf_1;
42996 break;
42997 /* For other modes prefer other shuffles this function creates. */
42998 default: break;
42999 }
43000 if (gen != NULL)
43001 {
43002 if (!d->testing_p)
43003 emit_insn (gen (d->target, d->op0));
43004 return true;
43005 }
43006 }
43007
43008 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43009 return true;
43010
43011 /* There are plenty of patterns in sse.md that are written for
43012 SEL+CONCAT and are not replicated for a single op. Perhaps
43013 that should be changed, to avoid the nastiness here. */
43014
43015 /* Recognize interleave style patterns, which means incrementing
43016 every other permutation operand. */
43017 for (i = 0; i < nelt; i += 2)
43018 {
43019 perm2[i] = d->perm[i] & mask;
43020 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43021 }
43022 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43023 d->testing_p))
43024 return true;
43025
43026 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43027 if (nelt >= 4)
43028 {
43029 for (i = 0; i < nelt; i += 4)
43030 {
43031 perm2[i + 0] = d->perm[i + 0] & mask;
43032 perm2[i + 1] = d->perm[i + 1] & mask;
43033 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43034 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43035 }
43036
43037 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43038 d->testing_p))
43039 return true;
43040 }
43041 }
43042
43043 /* Finally, try the fully general two operand permute. */
43044 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43045 d->testing_p))
43046 return true;
43047
43048 /* Recognize interleave style patterns with reversed operands. */
43049 if (!d->one_operand_p)
43050 {
43051 for (i = 0; i < nelt; ++i)
43052 {
43053 unsigned e = d->perm[i];
43054 if (e >= nelt)
43055 e -= nelt;
43056 else
43057 e += nelt;
43058 perm2[i] = e;
43059 }
43060
43061 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43062 d->testing_p))
43063 return true;
43064 }
43065
43066 /* Try the SSE4.1 blend variable merge instructions. */
43067 if (expand_vec_perm_blend (d))
43068 return true;
43069
43070 /* Try one of the AVX vpermil variable permutations. */
43071 if (expand_vec_perm_vpermil (d))
43072 return true;
43073
43074 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43075 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43076 if (expand_vec_perm_pshufb (d))
43077 return true;
43078
43079 /* Try the AVX512F vpermi2 instructions. */
43080 rtx vec[64];
43081 enum machine_mode mode = d->vmode;
43082 if (mode == V8DFmode)
43083 mode = V8DImode;
43084 else if (mode == V16SFmode)
43085 mode = V16SImode;
43086 for (i = 0; i < nelt; ++i)
43087 vec[i] = GEN_INT (d->perm[i]);
43088 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43089 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43090 return true;
43091
43092 return false;
43093 }
43094
43095 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43096 in terms of a pair of pshuflw + pshufhw instructions. */
43097
43098 static bool
43099 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43100 {
43101 unsigned char perm2[MAX_VECT_LEN];
43102 unsigned i;
43103 bool ok;
43104
43105 if (d->vmode != V8HImode || !d->one_operand_p)
43106 return false;
43107
43108 /* The two permutations only operate in 64-bit lanes. */
43109 for (i = 0; i < 4; ++i)
43110 if (d->perm[i] >= 4)
43111 return false;
43112 for (i = 4; i < 8; ++i)
43113 if (d->perm[i] < 4)
43114 return false;
43115
43116 if (d->testing_p)
43117 return true;
43118
43119 /* Emit the pshuflw. */
43120 memcpy (perm2, d->perm, 4);
43121 for (i = 4; i < 8; ++i)
43122 perm2[i] = i;
43123 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43124 gcc_assert (ok);
43125
43126 /* Emit the pshufhw. */
43127 memcpy (perm2 + 4, d->perm + 4, 4);
43128 for (i = 0; i < 4; ++i)
43129 perm2[i] = i;
43130 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43131 gcc_assert (ok);
43132
43133 return true;
43134 }
43135
43136 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43137 the permutation using the SSSE3 palignr instruction. This succeeds
43138 when all of the elements in PERM fit within one vector and we merely
43139 need to shift them down so that a single vector permutation has a
43140 chance to succeed. */
43141
43142 static bool
43143 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43144 {
43145 unsigned i, nelt = d->nelt;
43146 unsigned min, max;
43147 bool in_order, ok;
43148 rtx shift, target;
43149 struct expand_vec_perm_d dcopy;
43150
43151 /* Even with AVX, palignr only operates on 128-bit vectors. */
43152 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43153 return false;
43154
43155 min = nelt, max = 0;
43156 for (i = 0; i < nelt; ++i)
43157 {
43158 unsigned e = d->perm[i];
43159 if (e < min)
43160 min = e;
43161 if (e > max)
43162 max = e;
43163 }
43164 if (min == 0 || max - min >= nelt)
43165 return false;
43166
43167 /* Given that we have SSSE3, we know we'll be able to implement the
43168 single operand permutation after the palignr with pshufb. */
43169 if (d->testing_p)
43170 return true;
43171
43172 dcopy = *d;
43173 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43174 target = gen_reg_rtx (TImode);
43175 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43176 gen_lowpart (TImode, d->op0), shift));
43177
43178 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43179 dcopy.one_operand_p = true;
43180
43181 in_order = true;
43182 for (i = 0; i < nelt; ++i)
43183 {
43184 unsigned e = dcopy.perm[i] - min;
43185 if (e != i)
43186 in_order = false;
43187 dcopy.perm[i] = e;
43188 }
43189
43190 /* Test for the degenerate case where the alignment by itself
43191 produces the desired permutation. */
43192 if (in_order)
43193 {
43194 emit_move_insn (d->target, dcopy.op0);
43195 return true;
43196 }
43197
43198 ok = expand_vec_perm_1 (&dcopy);
43199 gcc_assert (ok);
43200
43201 return ok;
43202 }
43203
43204 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43205 the permutation using the SSE4_1 pblendv instruction. Potentially
43206 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43207
43208 static bool
43209 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43210 {
43211 unsigned i, which, nelt = d->nelt;
43212 struct expand_vec_perm_d dcopy, dcopy1;
43213 enum machine_mode vmode = d->vmode;
43214 bool ok;
43215
43216 /* Use the same checks as in expand_vec_perm_blend, but skipping
43217 AVX2 as it requires more than 2 instructions for general case. */
43218 if (d->one_operand_p)
43219 return false;
43220 if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43221 ;
43222 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43223 ;
43224 else
43225 return false;
43226
43227 /* Figure out where permutation elements stay not in their
43228 respective lanes. */
43229 for (i = 0, which = 0; i < nelt; ++i)
43230 {
43231 unsigned e = d->perm[i];
43232 if (e != i)
43233 which |= (e < nelt ? 1 : 2);
43234 }
43235 /* We can pblend the part where elements stay not in their
43236 respective lanes only when these elements are all in one
43237 half of a permutation.
43238 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43239 lanes, but both 8 and 9 >= 8
43240 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43241 respective lanes and 8 >= 8, but 2 not. */
43242 if (which != 1 && which != 2)
43243 return false;
43244 if (d->testing_p)
43245 return true;
43246
43247 /* First we apply one operand permutation to the part where
43248 elements stay not in their respective lanes. */
43249 dcopy = *d;
43250 if (which == 2)
43251 dcopy.op0 = dcopy.op1 = d->op1;
43252 else
43253 dcopy.op0 = dcopy.op1 = d->op0;
43254 dcopy.one_operand_p = true;
43255
43256 for (i = 0; i < nelt; ++i)
43257 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43258
43259 ok = expand_vec_perm_1 (&dcopy);
43260 gcc_assert (ok);
43261
43262 /* Next we put permuted elements into their positions. */
43263 dcopy1 = *d;
43264 if (which == 2)
43265 dcopy1.op1 = dcopy.target;
43266 else
43267 dcopy1.op0 = dcopy.target;
43268
43269 for (i = 0; i < nelt; ++i)
43270 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43271
43272 ok = expand_vec_perm_blend (&dcopy1);
43273 gcc_assert (ok);
43274
43275 return true;
43276 }
43277
43278 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43279
43280 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43281 a two vector permutation into a single vector permutation by using
43282 an interleave operation to merge the vectors. */
43283
43284 static bool
43285 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43286 {
43287 struct expand_vec_perm_d dremap, dfinal;
43288 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43289 unsigned HOST_WIDE_INT contents;
43290 unsigned char remap[2 * MAX_VECT_LEN];
43291 rtx seq;
43292 bool ok, same_halves = false;
43293
43294 if (GET_MODE_SIZE (d->vmode) == 16)
43295 {
43296 if (d->one_operand_p)
43297 return false;
43298 }
43299 else if (GET_MODE_SIZE (d->vmode) == 32)
43300 {
43301 if (!TARGET_AVX)
43302 return false;
43303 /* For 32-byte modes allow even d->one_operand_p.
43304 The lack of cross-lane shuffling in some instructions
43305 might prevent a single insn shuffle. */
43306 dfinal = *d;
43307 dfinal.testing_p = true;
43308 /* If expand_vec_perm_interleave3 can expand this into
43309 a 3 insn sequence, give up and let it be expanded as
43310 3 insn sequence. While that is one insn longer,
43311 it doesn't need a memory operand and in the common
43312 case that both interleave low and high permutations
43313 with the same operands are adjacent needs 4 insns
43314 for both after CSE. */
43315 if (expand_vec_perm_interleave3 (&dfinal))
43316 return false;
43317 }
43318 else
43319 return false;
43320
43321 /* Examine from whence the elements come. */
43322 contents = 0;
43323 for (i = 0; i < nelt; ++i)
43324 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43325
43326 memset (remap, 0xff, sizeof (remap));
43327 dremap = *d;
43328
43329 if (GET_MODE_SIZE (d->vmode) == 16)
43330 {
43331 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43332
43333 /* Split the two input vectors into 4 halves. */
43334 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43335 h2 = h1 << nelt2;
43336 h3 = h2 << nelt2;
43337 h4 = h3 << nelt2;
43338
43339 /* If the elements from the low halves use interleave low, and similarly
43340 for interleave high. If the elements are from mis-matched halves, we
43341 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43342 if ((contents & (h1 | h3)) == contents)
43343 {
43344 /* punpckl* */
43345 for (i = 0; i < nelt2; ++i)
43346 {
43347 remap[i] = i * 2;
43348 remap[i + nelt] = i * 2 + 1;
43349 dremap.perm[i * 2] = i;
43350 dremap.perm[i * 2 + 1] = i + nelt;
43351 }
43352 if (!TARGET_SSE2 && d->vmode == V4SImode)
43353 dremap.vmode = V4SFmode;
43354 }
43355 else if ((contents & (h2 | h4)) == contents)
43356 {
43357 /* punpckh* */
43358 for (i = 0; i < nelt2; ++i)
43359 {
43360 remap[i + nelt2] = i * 2;
43361 remap[i + nelt + nelt2] = i * 2 + 1;
43362 dremap.perm[i * 2] = i + nelt2;
43363 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43364 }
43365 if (!TARGET_SSE2 && d->vmode == V4SImode)
43366 dremap.vmode = V4SFmode;
43367 }
43368 else if ((contents & (h1 | h4)) == contents)
43369 {
43370 /* shufps */
43371 for (i = 0; i < nelt2; ++i)
43372 {
43373 remap[i] = i;
43374 remap[i + nelt + nelt2] = i + nelt2;
43375 dremap.perm[i] = i;
43376 dremap.perm[i + nelt2] = i + nelt + nelt2;
43377 }
43378 if (nelt != 4)
43379 {
43380 /* shufpd */
43381 dremap.vmode = V2DImode;
43382 dremap.nelt = 2;
43383 dremap.perm[0] = 0;
43384 dremap.perm[1] = 3;
43385 }
43386 }
43387 else if ((contents & (h2 | h3)) == contents)
43388 {
43389 /* shufps */
43390 for (i = 0; i < nelt2; ++i)
43391 {
43392 remap[i + nelt2] = i;
43393 remap[i + nelt] = i + nelt2;
43394 dremap.perm[i] = i + nelt2;
43395 dremap.perm[i + nelt2] = i + nelt;
43396 }
43397 if (nelt != 4)
43398 {
43399 /* shufpd */
43400 dremap.vmode = V2DImode;
43401 dremap.nelt = 2;
43402 dremap.perm[0] = 1;
43403 dremap.perm[1] = 2;
43404 }
43405 }
43406 else
43407 return false;
43408 }
43409 else
43410 {
43411 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43412 unsigned HOST_WIDE_INT q[8];
43413 unsigned int nonzero_halves[4];
43414
43415 /* Split the two input vectors into 8 quarters. */
43416 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43417 for (i = 1; i < 8; ++i)
43418 q[i] = q[0] << (nelt4 * i);
43419 for (i = 0; i < 4; ++i)
43420 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43421 {
43422 nonzero_halves[nzcnt] = i;
43423 ++nzcnt;
43424 }
43425
43426 if (nzcnt == 1)
43427 {
43428 gcc_assert (d->one_operand_p);
43429 nonzero_halves[1] = nonzero_halves[0];
43430 same_halves = true;
43431 }
43432 else if (d->one_operand_p)
43433 {
43434 gcc_assert (nonzero_halves[0] == 0);
43435 gcc_assert (nonzero_halves[1] == 1);
43436 }
43437
43438 if (nzcnt <= 2)
43439 {
43440 if (d->perm[0] / nelt2 == nonzero_halves[1])
43441 {
43442 /* Attempt to increase the likelihood that dfinal
43443 shuffle will be intra-lane. */
43444 char tmph = nonzero_halves[0];
43445 nonzero_halves[0] = nonzero_halves[1];
43446 nonzero_halves[1] = tmph;
43447 }
43448
43449 /* vperm2f128 or vperm2i128. */
43450 for (i = 0; i < nelt2; ++i)
43451 {
43452 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43453 remap[i + nonzero_halves[0] * nelt2] = i;
43454 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43455 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43456 }
43457
43458 if (d->vmode != V8SFmode
43459 && d->vmode != V4DFmode
43460 && d->vmode != V8SImode)
43461 {
43462 dremap.vmode = V8SImode;
43463 dremap.nelt = 8;
43464 for (i = 0; i < 4; ++i)
43465 {
43466 dremap.perm[i] = i + nonzero_halves[0] * 4;
43467 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43468 }
43469 }
43470 }
43471 else if (d->one_operand_p)
43472 return false;
43473 else if (TARGET_AVX2
43474 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43475 {
43476 /* vpunpckl* */
43477 for (i = 0; i < nelt4; ++i)
43478 {
43479 remap[i] = i * 2;
43480 remap[i + nelt] = i * 2 + 1;
43481 remap[i + nelt2] = i * 2 + nelt2;
43482 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43483 dremap.perm[i * 2] = i;
43484 dremap.perm[i * 2 + 1] = i + nelt;
43485 dremap.perm[i * 2 + nelt2] = i + nelt2;
43486 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43487 }
43488 }
43489 else if (TARGET_AVX2
43490 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43491 {
43492 /* vpunpckh* */
43493 for (i = 0; i < nelt4; ++i)
43494 {
43495 remap[i + nelt4] = i * 2;
43496 remap[i + nelt + nelt4] = i * 2 + 1;
43497 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43498 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43499 dremap.perm[i * 2] = i + nelt4;
43500 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43501 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43502 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43503 }
43504 }
43505 else
43506 return false;
43507 }
43508
43509 /* Use the remapping array set up above to move the elements from their
43510 swizzled locations into their final destinations. */
43511 dfinal = *d;
43512 for (i = 0; i < nelt; ++i)
43513 {
43514 unsigned e = remap[d->perm[i]];
43515 gcc_assert (e < nelt);
43516 /* If same_halves is true, both halves of the remapped vector are the
43517 same. Avoid cross-lane accesses if possible. */
43518 if (same_halves && i >= nelt2)
43519 {
43520 gcc_assert (e < nelt2);
43521 dfinal.perm[i] = e + nelt2;
43522 }
43523 else
43524 dfinal.perm[i] = e;
43525 }
43526 if (!d->testing_p)
43527 {
43528 dremap.target = gen_reg_rtx (dremap.vmode);
43529 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43530 }
43531 dfinal.op1 = dfinal.op0;
43532 dfinal.one_operand_p = true;
43533
43534 /* Test if the final remap can be done with a single insn. For V4SFmode or
43535 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43536 start_sequence ();
43537 ok = expand_vec_perm_1 (&dfinal);
43538 seq = get_insns ();
43539 end_sequence ();
43540
43541 if (!ok)
43542 return false;
43543
43544 if (d->testing_p)
43545 return true;
43546
43547 if (dremap.vmode != dfinal.vmode)
43548 {
43549 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43550 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43551 }
43552
43553 ok = expand_vec_perm_1 (&dremap);
43554 gcc_assert (ok);
43555
43556 emit_insn (seq);
43557 return true;
43558 }
43559
43560 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43561 a single vector cross-lane permutation into vpermq followed
43562 by any of the single insn permutations. */
43563
43564 static bool
43565 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43566 {
43567 struct expand_vec_perm_d dremap, dfinal;
43568 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43569 unsigned contents[2];
43570 bool ok;
43571
43572 if (!(TARGET_AVX2
43573 && (d->vmode == V32QImode || d->vmode == V16HImode)
43574 && d->one_operand_p))
43575 return false;
43576
43577 contents[0] = 0;
43578 contents[1] = 0;
43579 for (i = 0; i < nelt2; ++i)
43580 {
43581 contents[0] |= 1u << (d->perm[i] / nelt4);
43582 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43583 }
43584
43585 for (i = 0; i < 2; ++i)
43586 {
43587 unsigned int cnt = 0;
43588 for (j = 0; j < 4; ++j)
43589 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43590 return false;
43591 }
43592
43593 if (d->testing_p)
43594 return true;
43595
43596 dremap = *d;
43597 dremap.vmode = V4DImode;
43598 dremap.nelt = 4;
43599 dremap.target = gen_reg_rtx (V4DImode);
43600 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43601 dremap.op1 = dremap.op0;
43602 dremap.one_operand_p = true;
43603 for (i = 0; i < 2; ++i)
43604 {
43605 unsigned int cnt = 0;
43606 for (j = 0; j < 4; ++j)
43607 if ((contents[i] & (1u << j)) != 0)
43608 dremap.perm[2 * i + cnt++] = j;
43609 for (; cnt < 2; ++cnt)
43610 dremap.perm[2 * i + cnt] = 0;
43611 }
43612
43613 dfinal = *d;
43614 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43615 dfinal.op1 = dfinal.op0;
43616 dfinal.one_operand_p = true;
43617 for (i = 0, j = 0; i < nelt; ++i)
43618 {
43619 if (i == nelt2)
43620 j = 2;
43621 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43622 if ((d->perm[i] / nelt4) == dremap.perm[j])
43623 ;
43624 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43625 dfinal.perm[i] |= nelt4;
43626 else
43627 gcc_unreachable ();
43628 }
43629
43630 ok = expand_vec_perm_1 (&dremap);
43631 gcc_assert (ok);
43632
43633 ok = expand_vec_perm_1 (&dfinal);
43634 gcc_assert (ok);
43635
43636 return true;
43637 }
43638
43639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43640 a vector permutation using two instructions, vperm2f128 resp.
43641 vperm2i128 followed by any single in-lane permutation. */
43642
43643 static bool
43644 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43645 {
43646 struct expand_vec_perm_d dfirst, dsecond;
43647 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43648 bool ok;
43649
43650 if (!TARGET_AVX
43651 || GET_MODE_SIZE (d->vmode) != 32
43652 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43653 return false;
43654
43655 dsecond = *d;
43656 dsecond.one_operand_p = false;
43657 dsecond.testing_p = true;
43658
43659 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43660 immediate. For perm < 16 the second permutation uses
43661 d->op0 as first operand, for perm >= 16 it uses d->op1
43662 as first operand. The second operand is the result of
43663 vperm2[fi]128. */
43664 for (perm = 0; perm < 32; perm++)
43665 {
43666 /* Ignore permutations which do not move anything cross-lane. */
43667 if (perm < 16)
43668 {
43669 /* The second shuffle for e.g. V4DFmode has
43670 0123 and ABCD operands.
43671 Ignore AB23, as 23 is already in the second lane
43672 of the first operand. */
43673 if ((perm & 0xc) == (1 << 2)) continue;
43674 /* And 01CD, as 01 is in the first lane of the first
43675 operand. */
43676 if ((perm & 3) == 0) continue;
43677 /* And 4567, as then the vperm2[fi]128 doesn't change
43678 anything on the original 4567 second operand. */
43679 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43680 }
43681 else
43682 {
43683 /* The second shuffle for e.g. V4DFmode has
43684 4567 and ABCD operands.
43685 Ignore AB67, as 67 is already in the second lane
43686 of the first operand. */
43687 if ((perm & 0xc) == (3 << 2)) continue;
43688 /* And 45CD, as 45 is in the first lane of the first
43689 operand. */
43690 if ((perm & 3) == 2) continue;
43691 /* And 0123, as then the vperm2[fi]128 doesn't change
43692 anything on the original 0123 first operand. */
43693 if ((perm & 0xf) == (1 << 2)) continue;
43694 }
43695
43696 for (i = 0; i < nelt; i++)
43697 {
43698 j = d->perm[i] / nelt2;
43699 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43700 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43701 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43702 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43703 else
43704 break;
43705 }
43706
43707 if (i == nelt)
43708 {
43709 start_sequence ();
43710 ok = expand_vec_perm_1 (&dsecond);
43711 end_sequence ();
43712 }
43713 else
43714 ok = false;
43715
43716 if (ok)
43717 {
43718 if (d->testing_p)
43719 return true;
43720
43721 /* Found a usable second shuffle. dfirst will be
43722 vperm2f128 on d->op0 and d->op1. */
43723 dsecond.testing_p = false;
43724 dfirst = *d;
43725 dfirst.target = gen_reg_rtx (d->vmode);
43726 for (i = 0; i < nelt; i++)
43727 dfirst.perm[i] = (i & (nelt2 - 1))
43728 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43729
43730 ok = expand_vec_perm_1 (&dfirst);
43731 gcc_assert (ok);
43732
43733 /* And dsecond is some single insn shuffle, taking
43734 d->op0 and result of vperm2f128 (if perm < 16) or
43735 d->op1 and result of vperm2f128 (otherwise). */
43736 dsecond.op1 = dfirst.target;
43737 if (perm >= 16)
43738 dsecond.op0 = dfirst.op1;
43739
43740 ok = expand_vec_perm_1 (&dsecond);
43741 gcc_assert (ok);
43742
43743 return true;
43744 }
43745
43746 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43747 if (d->one_operand_p)
43748 return false;
43749 }
43750
43751 return false;
43752 }
43753
43754 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43755 a two vector permutation using 2 intra-lane interleave insns
43756 and cross-lane shuffle for 32-byte vectors. */
43757
43758 static bool
43759 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43760 {
43761 unsigned i, nelt;
43762 rtx (*gen) (rtx, rtx, rtx);
43763
43764 if (d->one_operand_p)
43765 return false;
43766 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43767 ;
43768 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43769 ;
43770 else
43771 return false;
43772
43773 nelt = d->nelt;
43774 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43775 return false;
43776 for (i = 0; i < nelt; i += 2)
43777 if (d->perm[i] != d->perm[0] + i / 2
43778 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43779 return false;
43780
43781 if (d->testing_p)
43782 return true;
43783
43784 switch (d->vmode)
43785 {
43786 case V32QImode:
43787 if (d->perm[0])
43788 gen = gen_vec_interleave_highv32qi;
43789 else
43790 gen = gen_vec_interleave_lowv32qi;
43791 break;
43792 case V16HImode:
43793 if (d->perm[0])
43794 gen = gen_vec_interleave_highv16hi;
43795 else
43796 gen = gen_vec_interleave_lowv16hi;
43797 break;
43798 case V8SImode:
43799 if (d->perm[0])
43800 gen = gen_vec_interleave_highv8si;
43801 else
43802 gen = gen_vec_interleave_lowv8si;
43803 break;
43804 case V4DImode:
43805 if (d->perm[0])
43806 gen = gen_vec_interleave_highv4di;
43807 else
43808 gen = gen_vec_interleave_lowv4di;
43809 break;
43810 case V8SFmode:
43811 if (d->perm[0])
43812 gen = gen_vec_interleave_highv8sf;
43813 else
43814 gen = gen_vec_interleave_lowv8sf;
43815 break;
43816 case V4DFmode:
43817 if (d->perm[0])
43818 gen = gen_vec_interleave_highv4df;
43819 else
43820 gen = gen_vec_interleave_lowv4df;
43821 break;
43822 default:
43823 gcc_unreachable ();
43824 }
43825
43826 emit_insn (gen (d->target, d->op0, d->op1));
43827 return true;
43828 }
43829
43830 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43831 a single vector permutation using a single intra-lane vector
43832 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43833 the non-swapped and swapped vectors together. */
43834
43835 static bool
43836 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43837 {
43838 struct expand_vec_perm_d dfirst, dsecond;
43839 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43840 rtx seq;
43841 bool ok;
43842 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43843
43844 if (!TARGET_AVX
43845 || TARGET_AVX2
43846 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43847 || !d->one_operand_p)
43848 return false;
43849
43850 dfirst = *d;
43851 for (i = 0; i < nelt; i++)
43852 dfirst.perm[i] = 0xff;
43853 for (i = 0, msk = 0; i < nelt; i++)
43854 {
43855 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43856 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43857 return false;
43858 dfirst.perm[j] = d->perm[i];
43859 if (j != i)
43860 msk |= (1 << i);
43861 }
43862 for (i = 0; i < nelt; i++)
43863 if (dfirst.perm[i] == 0xff)
43864 dfirst.perm[i] = i;
43865
43866 if (!d->testing_p)
43867 dfirst.target = gen_reg_rtx (dfirst.vmode);
43868
43869 start_sequence ();
43870 ok = expand_vec_perm_1 (&dfirst);
43871 seq = get_insns ();
43872 end_sequence ();
43873
43874 if (!ok)
43875 return false;
43876
43877 if (d->testing_p)
43878 return true;
43879
43880 emit_insn (seq);
43881
43882 dsecond = *d;
43883 dsecond.op0 = dfirst.target;
43884 dsecond.op1 = dfirst.target;
43885 dsecond.one_operand_p = true;
43886 dsecond.target = gen_reg_rtx (dsecond.vmode);
43887 for (i = 0; i < nelt; i++)
43888 dsecond.perm[i] = i ^ nelt2;
43889
43890 ok = expand_vec_perm_1 (&dsecond);
43891 gcc_assert (ok);
43892
43893 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43894 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43895 return true;
43896 }
43897
43898 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43899 permutation using two vperm2f128, followed by a vshufpd insn blending
43900 the two vectors together. */
43901
43902 static bool
43903 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43904 {
43905 struct expand_vec_perm_d dfirst, dsecond, dthird;
43906 bool ok;
43907
43908 if (!TARGET_AVX || (d->vmode != V4DFmode))
43909 return false;
43910
43911 if (d->testing_p)
43912 return true;
43913
43914 dfirst = *d;
43915 dsecond = *d;
43916 dthird = *d;
43917
43918 dfirst.perm[0] = (d->perm[0] & ~1);
43919 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43920 dfirst.perm[2] = (d->perm[2] & ~1);
43921 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43922 dsecond.perm[0] = (d->perm[1] & ~1);
43923 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43924 dsecond.perm[2] = (d->perm[3] & ~1);
43925 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43926 dthird.perm[0] = (d->perm[0] % 2);
43927 dthird.perm[1] = (d->perm[1] % 2) + 4;
43928 dthird.perm[2] = (d->perm[2] % 2) + 2;
43929 dthird.perm[3] = (d->perm[3] % 2) + 6;
43930
43931 dfirst.target = gen_reg_rtx (dfirst.vmode);
43932 dsecond.target = gen_reg_rtx (dsecond.vmode);
43933 dthird.op0 = dfirst.target;
43934 dthird.op1 = dsecond.target;
43935 dthird.one_operand_p = false;
43936
43937 canonicalize_perm (&dfirst);
43938 canonicalize_perm (&dsecond);
43939
43940 ok = expand_vec_perm_1 (&dfirst)
43941 && expand_vec_perm_1 (&dsecond)
43942 && expand_vec_perm_1 (&dthird);
43943
43944 gcc_assert (ok);
43945
43946 return true;
43947 }
43948
43949 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43950 permutation with two pshufb insns and an ior. We should have already
43951 failed all two instruction sequences. */
43952
43953 static bool
43954 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43955 {
43956 rtx rperm[2][16], vperm, l, h, op, m128;
43957 unsigned int i, nelt, eltsz;
43958
43959 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43960 return false;
43961 gcc_assert (!d->one_operand_p);
43962
43963 if (d->testing_p)
43964 return true;
43965
43966 nelt = d->nelt;
43967 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43968
43969 /* Generate two permutation masks. If the required element is within
43970 the given vector it is shuffled into the proper lane. If the required
43971 element is in the other vector, force a zero into the lane by setting
43972 bit 7 in the permutation mask. */
43973 m128 = GEN_INT (-128);
43974 for (i = 0; i < nelt; ++i)
43975 {
43976 unsigned j, e = d->perm[i];
43977 unsigned which = (e >= nelt);
43978 if (e >= nelt)
43979 e -= nelt;
43980
43981 for (j = 0; j < eltsz; ++j)
43982 {
43983 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43984 rperm[1-which][i*eltsz + j] = m128;
43985 }
43986 }
43987
43988 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43989 vperm = force_reg (V16QImode, vperm);
43990
43991 l = gen_reg_rtx (V16QImode);
43992 op = gen_lowpart (V16QImode, d->op0);
43993 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43994
43995 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43996 vperm = force_reg (V16QImode, vperm);
43997
43998 h = gen_reg_rtx (V16QImode);
43999 op = gen_lowpart (V16QImode, d->op1);
44000 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44001
44002 op = d->target;
44003 if (d->vmode != V16QImode)
44004 op = gen_reg_rtx (V16QImode);
44005 emit_insn (gen_iorv16qi3 (op, l, h));
44006 if (op != d->target)
44007 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44008
44009 return true;
44010 }
44011
44012 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44013 with two vpshufb insns, vpermq and vpor. We should have already failed
44014 all two or three instruction sequences. */
44015
44016 static bool
44017 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44018 {
44019 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44020 unsigned int i, nelt, eltsz;
44021
44022 if (!TARGET_AVX2
44023 || !d->one_operand_p
44024 || (d->vmode != V32QImode && d->vmode != V16HImode))
44025 return false;
44026
44027 if (d->testing_p)
44028 return true;
44029
44030 nelt = d->nelt;
44031 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44032
44033 /* Generate two permutation masks. If the required element is within
44034 the same lane, it is shuffled in. If the required element from the
44035 other lane, force a zero by setting bit 7 in the permutation mask.
44036 In the other mask the mask has non-negative elements if element
44037 is requested from the other lane, but also moved to the other lane,
44038 so that the result of vpshufb can have the two V2TImode halves
44039 swapped. */
44040 m128 = GEN_INT (-128);
44041 for (i = 0; i < nelt; ++i)
44042 {
44043 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44044 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44045
44046 for (j = 0; j < eltsz; ++j)
44047 {
44048 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44049 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44050 }
44051 }
44052
44053 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44054 vperm = force_reg (V32QImode, vperm);
44055
44056 h = gen_reg_rtx (V32QImode);
44057 op = gen_lowpart (V32QImode, d->op0);
44058 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44059
44060 /* Swap the 128-byte lanes of h into hp. */
44061 hp = gen_reg_rtx (V4DImode);
44062 op = gen_lowpart (V4DImode, h);
44063 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44064 const1_rtx));
44065
44066 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44067 vperm = force_reg (V32QImode, vperm);
44068
44069 l = gen_reg_rtx (V32QImode);
44070 op = gen_lowpart (V32QImode, d->op0);
44071 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44072
44073 op = d->target;
44074 if (d->vmode != V32QImode)
44075 op = gen_reg_rtx (V32QImode);
44076 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44077 if (op != d->target)
44078 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44079
44080 return true;
44081 }
44082
44083 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44084 and extract-odd permutations of two V32QImode and V16QImode operand
44085 with two vpshufb insns, vpor and vpermq. We should have already
44086 failed all two or three instruction sequences. */
44087
44088 static bool
44089 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44090 {
44091 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44092 unsigned int i, nelt, eltsz;
44093
44094 if (!TARGET_AVX2
44095 || d->one_operand_p
44096 || (d->vmode != V32QImode && d->vmode != V16HImode))
44097 return false;
44098
44099 for (i = 0; i < d->nelt; ++i)
44100 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44101 return false;
44102
44103 if (d->testing_p)
44104 return true;
44105
44106 nelt = d->nelt;
44107 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44108
44109 /* Generate two permutation masks. In the first permutation mask
44110 the first quarter will contain indexes for the first half
44111 of the op0, the second quarter will contain bit 7 set, third quarter
44112 will contain indexes for the second half of the op0 and the
44113 last quarter bit 7 set. In the second permutation mask
44114 the first quarter will contain bit 7 set, the second quarter
44115 indexes for the first half of the op1, the third quarter bit 7 set
44116 and last quarter indexes for the second half of the op1.
44117 I.e. the first mask e.g. for V32QImode extract even will be:
44118 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44119 (all values masked with 0xf except for -128) and second mask
44120 for extract even will be
44121 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44122 m128 = GEN_INT (-128);
44123 for (i = 0; i < nelt; ++i)
44124 {
44125 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44126 unsigned which = d->perm[i] >= nelt;
44127 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44128
44129 for (j = 0; j < eltsz; ++j)
44130 {
44131 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44132 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44133 }
44134 }
44135
44136 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44137 vperm = force_reg (V32QImode, vperm);
44138
44139 l = gen_reg_rtx (V32QImode);
44140 op = gen_lowpart (V32QImode, d->op0);
44141 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44142
44143 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44144 vperm = force_reg (V32QImode, vperm);
44145
44146 h = gen_reg_rtx (V32QImode);
44147 op = gen_lowpart (V32QImode, d->op1);
44148 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44149
44150 ior = gen_reg_rtx (V32QImode);
44151 emit_insn (gen_iorv32qi3 (ior, l, h));
44152
44153 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44154 op = gen_reg_rtx (V4DImode);
44155 ior = gen_lowpart (V4DImode, ior);
44156 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44157 const1_rtx, GEN_INT (3)));
44158 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44159
44160 return true;
44161 }
44162
44163 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44164 and extract-odd permutations. */
44165
44166 static bool
44167 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44168 {
44169 rtx t1, t2, t3, t4, t5;
44170
44171 switch (d->vmode)
44172 {
44173 case V4DFmode:
44174 if (d->testing_p)
44175 break;
44176 t1 = gen_reg_rtx (V4DFmode);
44177 t2 = gen_reg_rtx (V4DFmode);
44178
44179 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44180 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44181 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44182
44183 /* Now an unpck[lh]pd will produce the result required. */
44184 if (odd)
44185 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44186 else
44187 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44188 emit_insn (t3);
44189 break;
44190
44191 case V8SFmode:
44192 {
44193 int mask = odd ? 0xdd : 0x88;
44194
44195 if (d->testing_p)
44196 break;
44197 t1 = gen_reg_rtx (V8SFmode);
44198 t2 = gen_reg_rtx (V8SFmode);
44199 t3 = gen_reg_rtx (V8SFmode);
44200
44201 /* Shuffle within the 128-bit lanes to produce:
44202 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44203 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44204 GEN_INT (mask)));
44205
44206 /* Shuffle the lanes around to produce:
44207 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44208 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44209 GEN_INT (0x3)));
44210
44211 /* Shuffle within the 128-bit lanes to produce:
44212 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44213 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44214
44215 /* Shuffle within the 128-bit lanes to produce:
44216 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44217 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44218
44219 /* Shuffle the lanes around to produce:
44220 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44221 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44222 GEN_INT (0x20)));
44223 }
44224 break;
44225
44226 case V2DFmode:
44227 case V4SFmode:
44228 case V2DImode:
44229 case V4SImode:
44230 /* These are always directly implementable by expand_vec_perm_1. */
44231 gcc_unreachable ();
44232
44233 case V8HImode:
44234 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44235 return expand_vec_perm_pshufb2 (d);
44236 else
44237 {
44238 if (d->testing_p)
44239 break;
44240 /* We need 2*log2(N)-1 operations to achieve odd/even
44241 with interleave. */
44242 t1 = gen_reg_rtx (V8HImode);
44243 t2 = gen_reg_rtx (V8HImode);
44244 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44245 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44246 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44247 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44248 if (odd)
44249 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44250 else
44251 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44252 emit_insn (t3);
44253 }
44254 break;
44255
44256 case V16QImode:
44257 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44258 return expand_vec_perm_pshufb2 (d);
44259 else
44260 {
44261 if (d->testing_p)
44262 break;
44263 t1 = gen_reg_rtx (V16QImode);
44264 t2 = gen_reg_rtx (V16QImode);
44265 t3 = gen_reg_rtx (V16QImode);
44266 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44267 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44268 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44269 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44270 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44271 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44272 if (odd)
44273 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44274 else
44275 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44276 emit_insn (t3);
44277 }
44278 break;
44279
44280 case V16HImode:
44281 case V32QImode:
44282 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44283
44284 case V4DImode:
44285 if (!TARGET_AVX2)
44286 {
44287 struct expand_vec_perm_d d_copy = *d;
44288 d_copy.vmode = V4DFmode;
44289 if (d->testing_p)
44290 d_copy.target = gen_lowpart (V4DFmode, d->target);
44291 else
44292 d_copy.target = gen_reg_rtx (V4DFmode);
44293 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44294 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44295 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44296 {
44297 if (!d->testing_p)
44298 emit_move_insn (d->target,
44299 gen_lowpart (V4DImode, d_copy.target));
44300 return true;
44301 }
44302 return false;
44303 }
44304
44305 if (d->testing_p)
44306 break;
44307
44308 t1 = gen_reg_rtx (V4DImode);
44309 t2 = gen_reg_rtx (V4DImode);
44310
44311 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44312 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44313 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44314
44315 /* Now an vpunpck[lh]qdq will produce the result required. */
44316 if (odd)
44317 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44318 else
44319 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44320 emit_insn (t3);
44321 break;
44322
44323 case V8SImode:
44324 if (!TARGET_AVX2)
44325 {
44326 struct expand_vec_perm_d d_copy = *d;
44327 d_copy.vmode = V8SFmode;
44328 if (d->testing_p)
44329 d_copy.target = gen_lowpart (V8SFmode, d->target);
44330 else
44331 d_copy.target = gen_reg_rtx (V8SFmode);
44332 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44333 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44334 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44335 {
44336 if (!d->testing_p)
44337 emit_move_insn (d->target,
44338 gen_lowpart (V8SImode, d_copy.target));
44339 return true;
44340 }
44341 return false;
44342 }
44343
44344 if (d->testing_p)
44345 break;
44346
44347 t1 = gen_reg_rtx (V8SImode);
44348 t2 = gen_reg_rtx (V8SImode);
44349 t3 = gen_reg_rtx (V4DImode);
44350 t4 = gen_reg_rtx (V4DImode);
44351 t5 = gen_reg_rtx (V4DImode);
44352
44353 /* Shuffle the lanes around into
44354 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44355 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44356 gen_lowpart (V4DImode, d->op1),
44357 GEN_INT (0x20)));
44358 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44359 gen_lowpart (V4DImode, d->op1),
44360 GEN_INT (0x31)));
44361
44362 /* Swap the 2nd and 3rd position in each lane into
44363 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44364 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44365 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44366 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44367 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44368
44369 /* Now an vpunpck[lh]qdq will produce
44370 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44371 if (odd)
44372 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44373 gen_lowpart (V4DImode, t2));
44374 else
44375 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44376 gen_lowpart (V4DImode, t2));
44377 emit_insn (t3);
44378 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44379 break;
44380
44381 default:
44382 gcc_unreachable ();
44383 }
44384
44385 return true;
44386 }
44387
44388 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44389 extract-even and extract-odd permutations. */
44390
44391 static bool
44392 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44393 {
44394 unsigned i, odd, nelt = d->nelt;
44395
44396 odd = d->perm[0];
44397 if (odd != 0 && odd != 1)
44398 return false;
44399
44400 for (i = 1; i < nelt; ++i)
44401 if (d->perm[i] != 2 * i + odd)
44402 return false;
44403
44404 return expand_vec_perm_even_odd_1 (d, odd);
44405 }
44406
44407 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44408 permutations. We assume that expand_vec_perm_1 has already failed. */
44409
44410 static bool
44411 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44412 {
44413 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44414 enum machine_mode vmode = d->vmode;
44415 unsigned char perm2[4];
44416 rtx op0 = d->op0, dest;
44417 bool ok;
44418
44419 switch (vmode)
44420 {
44421 case V4DFmode:
44422 case V8SFmode:
44423 /* These are special-cased in sse.md so that we can optionally
44424 use the vbroadcast instruction. They expand to two insns
44425 if the input happens to be in a register. */
44426 gcc_unreachable ();
44427
44428 case V2DFmode:
44429 case V2DImode:
44430 case V4SFmode:
44431 case V4SImode:
44432 /* These are always implementable using standard shuffle patterns. */
44433 gcc_unreachable ();
44434
44435 case V8HImode:
44436 case V16QImode:
44437 /* These can be implemented via interleave. We save one insn by
44438 stopping once we have promoted to V4SImode and then use pshufd. */
44439 if (d->testing_p)
44440 return true;
44441 do
44442 {
44443 rtx dest;
44444 rtx (*gen) (rtx, rtx, rtx)
44445 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44446 : gen_vec_interleave_lowv8hi;
44447
44448 if (elt >= nelt2)
44449 {
44450 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44451 : gen_vec_interleave_highv8hi;
44452 elt -= nelt2;
44453 }
44454 nelt2 /= 2;
44455
44456 dest = gen_reg_rtx (vmode);
44457 emit_insn (gen (dest, op0, op0));
44458 vmode = get_mode_wider_vector (vmode);
44459 op0 = gen_lowpart (vmode, dest);
44460 }
44461 while (vmode != V4SImode);
44462
44463 memset (perm2, elt, 4);
44464 dest = gen_reg_rtx (V4SImode);
44465 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44466 gcc_assert (ok);
44467 if (!d->testing_p)
44468 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44469 return true;
44470
44471 case V32QImode:
44472 case V16HImode:
44473 case V8SImode:
44474 case V4DImode:
44475 /* For AVX2 broadcasts of the first element vpbroadcast* or
44476 vpermq should be used by expand_vec_perm_1. */
44477 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44478 return false;
44479
44480 default:
44481 gcc_unreachable ();
44482 }
44483 }
44484
44485 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44486 broadcast permutations. */
44487
44488 static bool
44489 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44490 {
44491 unsigned i, elt, nelt = d->nelt;
44492
44493 if (!d->one_operand_p)
44494 return false;
44495
44496 elt = d->perm[0];
44497 for (i = 1; i < nelt; ++i)
44498 if (d->perm[i] != elt)
44499 return false;
44500
44501 return expand_vec_perm_broadcast_1 (d);
44502 }
44503
44504 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44505 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44506 all the shorter instruction sequences. */
44507
44508 static bool
44509 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44510 {
44511 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44512 unsigned int i, nelt, eltsz;
44513 bool used[4];
44514
44515 if (!TARGET_AVX2
44516 || d->one_operand_p
44517 || (d->vmode != V32QImode && d->vmode != V16HImode))
44518 return false;
44519
44520 if (d->testing_p)
44521 return true;
44522
44523 nelt = d->nelt;
44524 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44525
44526 /* Generate 4 permutation masks. If the required element is within
44527 the same lane, it is shuffled in. If the required element from the
44528 other lane, force a zero by setting bit 7 in the permutation mask.
44529 In the other mask the mask has non-negative elements if element
44530 is requested from the other lane, but also moved to the other lane,
44531 so that the result of vpshufb can have the two V2TImode halves
44532 swapped. */
44533 m128 = GEN_INT (-128);
44534 for (i = 0; i < 32; ++i)
44535 {
44536 rperm[0][i] = m128;
44537 rperm[1][i] = m128;
44538 rperm[2][i] = m128;
44539 rperm[3][i] = m128;
44540 }
44541 used[0] = false;
44542 used[1] = false;
44543 used[2] = false;
44544 used[3] = false;
44545 for (i = 0; i < nelt; ++i)
44546 {
44547 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44548 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44549 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44550
44551 for (j = 0; j < eltsz; ++j)
44552 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44553 used[which] = true;
44554 }
44555
44556 for (i = 0; i < 2; ++i)
44557 {
44558 if (!used[2 * i + 1])
44559 {
44560 h[i] = NULL_RTX;
44561 continue;
44562 }
44563 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44564 gen_rtvec_v (32, rperm[2 * i + 1]));
44565 vperm = force_reg (V32QImode, vperm);
44566 h[i] = gen_reg_rtx (V32QImode);
44567 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44568 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44569 }
44570
44571 /* Swap the 128-byte lanes of h[X]. */
44572 for (i = 0; i < 2; ++i)
44573 {
44574 if (h[i] == NULL_RTX)
44575 continue;
44576 op = gen_reg_rtx (V4DImode);
44577 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44578 const2_rtx, GEN_INT (3), const0_rtx,
44579 const1_rtx));
44580 h[i] = gen_lowpart (V32QImode, op);
44581 }
44582
44583 for (i = 0; i < 2; ++i)
44584 {
44585 if (!used[2 * i])
44586 {
44587 l[i] = NULL_RTX;
44588 continue;
44589 }
44590 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44591 vperm = force_reg (V32QImode, vperm);
44592 l[i] = gen_reg_rtx (V32QImode);
44593 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44594 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44595 }
44596
44597 for (i = 0; i < 2; ++i)
44598 {
44599 if (h[i] && l[i])
44600 {
44601 op = gen_reg_rtx (V32QImode);
44602 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44603 l[i] = op;
44604 }
44605 else if (h[i])
44606 l[i] = h[i];
44607 }
44608
44609 gcc_assert (l[0] && l[1]);
44610 op = d->target;
44611 if (d->vmode != V32QImode)
44612 op = gen_reg_rtx (V32QImode);
44613 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44614 if (op != d->target)
44615 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44616 return true;
44617 }
44618
44619 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44620 With all of the interface bits taken care of, perform the expansion
44621 in D and return true on success. */
44622
44623 static bool
44624 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44625 {
44626 /* Try a single instruction expansion. */
44627 if (expand_vec_perm_1 (d))
44628 return true;
44629
44630 /* Try sequences of two instructions. */
44631
44632 if (expand_vec_perm_pshuflw_pshufhw (d))
44633 return true;
44634
44635 if (expand_vec_perm_palignr (d))
44636 return true;
44637
44638 if (expand_vec_perm_interleave2 (d))
44639 return true;
44640
44641 if (expand_vec_perm_broadcast (d))
44642 return true;
44643
44644 if (expand_vec_perm_vpermq_perm_1 (d))
44645 return true;
44646
44647 if (expand_vec_perm_vperm2f128 (d))
44648 return true;
44649
44650 if (expand_vec_perm_pblendv (d))
44651 return true;
44652
44653 /* Try sequences of three instructions. */
44654
44655 if (expand_vec_perm_2vperm2f128_vshuf (d))
44656 return true;
44657
44658 if (expand_vec_perm_pshufb2 (d))
44659 return true;
44660
44661 if (expand_vec_perm_interleave3 (d))
44662 return true;
44663
44664 if (expand_vec_perm_vperm2f128_vblend (d))
44665 return true;
44666
44667 /* Try sequences of four instructions. */
44668
44669 if (expand_vec_perm_vpshufb2_vpermq (d))
44670 return true;
44671
44672 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44673 return true;
44674
44675 /* ??? Look for narrow permutations whose element orderings would
44676 allow the promotion to a wider mode. */
44677
44678 /* ??? Look for sequences of interleave or a wider permute that place
44679 the data into the correct lanes for a half-vector shuffle like
44680 pshuf[lh]w or vpermilps. */
44681
44682 /* ??? Look for sequences of interleave that produce the desired results.
44683 The combinatorics of punpck[lh] get pretty ugly... */
44684
44685 if (expand_vec_perm_even_odd (d))
44686 return true;
44687
44688 /* Even longer sequences. */
44689 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44690 return true;
44691
44692 return false;
44693 }
44694
44695 /* If a permutation only uses one operand, make it clear. Returns true
44696 if the permutation references both operands. */
44697
44698 static bool
44699 canonicalize_perm (struct expand_vec_perm_d *d)
44700 {
44701 int i, which, nelt = d->nelt;
44702
44703 for (i = which = 0; i < nelt; ++i)
44704 which |= (d->perm[i] < nelt ? 1 : 2);
44705
44706 d->one_operand_p = true;
44707 switch (which)
44708 {
44709 default:
44710 gcc_unreachable();
44711
44712 case 3:
44713 if (!rtx_equal_p (d->op0, d->op1))
44714 {
44715 d->one_operand_p = false;
44716 break;
44717 }
44718 /* The elements of PERM do not suggest that only the first operand
44719 is used, but both operands are identical. Allow easier matching
44720 of the permutation by folding the permutation into the single
44721 input vector. */
44722 /* FALLTHRU */
44723
44724 case 2:
44725 for (i = 0; i < nelt; ++i)
44726 d->perm[i] &= nelt - 1;
44727 d->op0 = d->op1;
44728 break;
44729
44730 case 1:
44731 d->op1 = d->op0;
44732 break;
44733 }
44734
44735 return (which == 3);
44736 }
44737
44738 bool
44739 ix86_expand_vec_perm_const (rtx operands[4])
44740 {
44741 struct expand_vec_perm_d d;
44742 unsigned char perm[MAX_VECT_LEN];
44743 int i, nelt;
44744 bool two_args;
44745 rtx sel;
44746
44747 d.target = operands[0];
44748 d.op0 = operands[1];
44749 d.op1 = operands[2];
44750 sel = operands[3];
44751
44752 d.vmode = GET_MODE (d.target);
44753 gcc_assert (VECTOR_MODE_P (d.vmode));
44754 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44755 d.testing_p = false;
44756
44757 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44758 gcc_assert (XVECLEN (sel, 0) == nelt);
44759 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44760
44761 for (i = 0; i < nelt; ++i)
44762 {
44763 rtx e = XVECEXP (sel, 0, i);
44764 int ei = INTVAL (e) & (2 * nelt - 1);
44765 d.perm[i] = ei;
44766 perm[i] = ei;
44767 }
44768
44769 two_args = canonicalize_perm (&d);
44770
44771 if (ix86_expand_vec_perm_const_1 (&d))
44772 return true;
44773
44774 /* If the selector says both arguments are needed, but the operands are the
44775 same, the above tried to expand with one_operand_p and flattened selector.
44776 If that didn't work, retry without one_operand_p; we succeeded with that
44777 during testing. */
44778 if (two_args && d.one_operand_p)
44779 {
44780 d.one_operand_p = false;
44781 memcpy (d.perm, perm, sizeof (perm));
44782 return ix86_expand_vec_perm_const_1 (&d);
44783 }
44784
44785 return false;
44786 }
44787
44788 /* Implement targetm.vectorize.vec_perm_const_ok. */
44789
44790 static bool
44791 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44792 const unsigned char *sel)
44793 {
44794 struct expand_vec_perm_d d;
44795 unsigned int i, nelt, which;
44796 bool ret;
44797
44798 d.vmode = vmode;
44799 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44800 d.testing_p = true;
44801
44802 /* Given sufficient ISA support we can just return true here
44803 for selected vector modes. */
44804 if (d.vmode == V16SImode || d.vmode == V16SFmode
44805 || d.vmode == V8DFmode || d.vmode == V8DImode)
44806 /* All implementable with a single vpermi2 insn. */
44807 return true;
44808 if (GET_MODE_SIZE (d.vmode) == 16)
44809 {
44810 /* All implementable with a single vpperm insn. */
44811 if (TARGET_XOP)
44812 return true;
44813 /* All implementable with 2 pshufb + 1 ior. */
44814 if (TARGET_SSSE3)
44815 return true;
44816 /* All implementable with shufpd or unpck[lh]pd. */
44817 if (d.nelt == 2)
44818 return true;
44819 }
44820
44821 /* Extract the values from the vector CST into the permutation
44822 array in D. */
44823 memcpy (d.perm, sel, nelt);
44824 for (i = which = 0; i < nelt; ++i)
44825 {
44826 unsigned char e = d.perm[i];
44827 gcc_assert (e < 2 * nelt);
44828 which |= (e < nelt ? 1 : 2);
44829 }
44830
44831 /* For all elements from second vector, fold the elements to first. */
44832 if (which == 2)
44833 for (i = 0; i < nelt; ++i)
44834 d.perm[i] -= nelt;
44835
44836 /* Check whether the mask can be applied to the vector type. */
44837 d.one_operand_p = (which != 3);
44838
44839 /* Implementable with shufps or pshufd. */
44840 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44841 return true;
44842
44843 /* Otherwise we have to go through the motions and see if we can
44844 figure out how to generate the requested permutation. */
44845 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44846 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44847 if (!d.one_operand_p)
44848 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44849
44850 start_sequence ();
44851 ret = ix86_expand_vec_perm_const_1 (&d);
44852 end_sequence ();
44853
44854 return ret;
44855 }
44856
44857 void
44858 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44859 {
44860 struct expand_vec_perm_d d;
44861 unsigned i, nelt;
44862
44863 d.target = targ;
44864 d.op0 = op0;
44865 d.op1 = op1;
44866 d.vmode = GET_MODE (targ);
44867 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44868 d.one_operand_p = false;
44869 d.testing_p = false;
44870
44871 for (i = 0; i < nelt; ++i)
44872 d.perm[i] = i * 2 + odd;
44873
44874 /* We'll either be able to implement the permutation directly... */
44875 if (expand_vec_perm_1 (&d))
44876 return;
44877
44878 /* ... or we use the special-case patterns. */
44879 expand_vec_perm_even_odd_1 (&d, odd);
44880 }
44881
44882 static void
44883 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44884 {
44885 struct expand_vec_perm_d d;
44886 unsigned i, nelt, base;
44887 bool ok;
44888
44889 d.target = targ;
44890 d.op0 = op0;
44891 d.op1 = op1;
44892 d.vmode = GET_MODE (targ);
44893 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44894 d.one_operand_p = false;
44895 d.testing_p = false;
44896
44897 base = high_p ? nelt / 2 : 0;
44898 for (i = 0; i < nelt / 2; ++i)
44899 {
44900 d.perm[i * 2] = i + base;
44901 d.perm[i * 2 + 1] = i + base + nelt;
44902 }
44903
44904 /* Note that for AVX this isn't one instruction. */
44905 ok = ix86_expand_vec_perm_const_1 (&d);
44906 gcc_assert (ok);
44907 }
44908
44909
44910 /* Expand a vector operation CODE for a V*QImode in terms of the
44911 same operation on V*HImode. */
44912
44913 void
44914 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44915 {
44916 enum machine_mode qimode = GET_MODE (dest);
44917 enum machine_mode himode;
44918 rtx (*gen_il) (rtx, rtx, rtx);
44919 rtx (*gen_ih) (rtx, rtx, rtx);
44920 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44921 struct expand_vec_perm_d d;
44922 bool ok, full_interleave;
44923 bool uns_p = false;
44924 int i;
44925
44926 switch (qimode)
44927 {
44928 case V16QImode:
44929 himode = V8HImode;
44930 gen_il = gen_vec_interleave_lowv16qi;
44931 gen_ih = gen_vec_interleave_highv16qi;
44932 break;
44933 case V32QImode:
44934 himode = V16HImode;
44935 gen_il = gen_avx2_interleave_lowv32qi;
44936 gen_ih = gen_avx2_interleave_highv32qi;
44937 break;
44938 default:
44939 gcc_unreachable ();
44940 }
44941
44942 op2_l = op2_h = op2;
44943 switch (code)
44944 {
44945 case MULT:
44946 /* Unpack data such that we've got a source byte in each low byte of
44947 each word. We don't care what goes into the high byte of each word.
44948 Rather than trying to get zero in there, most convenient is to let
44949 it be a copy of the low byte. */
44950 op2_l = gen_reg_rtx (qimode);
44951 op2_h = gen_reg_rtx (qimode);
44952 emit_insn (gen_il (op2_l, op2, op2));
44953 emit_insn (gen_ih (op2_h, op2, op2));
44954 /* FALLTHRU */
44955
44956 op1_l = gen_reg_rtx (qimode);
44957 op1_h = gen_reg_rtx (qimode);
44958 emit_insn (gen_il (op1_l, op1, op1));
44959 emit_insn (gen_ih (op1_h, op1, op1));
44960 full_interleave = qimode == V16QImode;
44961 break;
44962
44963 case ASHIFT:
44964 case LSHIFTRT:
44965 uns_p = true;
44966 /* FALLTHRU */
44967 case ASHIFTRT:
44968 op1_l = gen_reg_rtx (himode);
44969 op1_h = gen_reg_rtx (himode);
44970 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44971 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44972 full_interleave = true;
44973 break;
44974 default:
44975 gcc_unreachable ();
44976 }
44977
44978 /* Perform the operation. */
44979 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44980 1, OPTAB_DIRECT);
44981 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44982 1, OPTAB_DIRECT);
44983 gcc_assert (res_l && res_h);
44984
44985 /* Merge the data back into the right place. */
44986 d.target = dest;
44987 d.op0 = gen_lowpart (qimode, res_l);
44988 d.op1 = gen_lowpart (qimode, res_h);
44989 d.vmode = qimode;
44990 d.nelt = GET_MODE_NUNITS (qimode);
44991 d.one_operand_p = false;
44992 d.testing_p = false;
44993
44994 if (full_interleave)
44995 {
44996 /* For SSE2, we used an full interleave, so the desired
44997 results are in the even elements. */
44998 for (i = 0; i < 32; ++i)
44999 d.perm[i] = i * 2;
45000 }
45001 else
45002 {
45003 /* For AVX, the interleave used above was not cross-lane. So the
45004 extraction is evens but with the second and third quarter swapped.
45005 Happily, that is even one insn shorter than even extraction. */
45006 for (i = 0; i < 32; ++i)
45007 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45008 }
45009
45010 ok = ix86_expand_vec_perm_const_1 (&d);
45011 gcc_assert (ok);
45012
45013 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45014 gen_rtx_fmt_ee (code, qimode, op1, op2));
45015 }
45016
45017 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45018 if op is CONST_VECTOR with all odd elements equal to their
45019 preceding element. */
45020
45021 static bool
45022 const_vector_equal_evenodd_p (rtx op)
45023 {
45024 enum machine_mode mode = GET_MODE (op);
45025 int i, nunits = GET_MODE_NUNITS (mode);
45026 if (GET_CODE (op) != CONST_VECTOR
45027 || nunits != CONST_VECTOR_NUNITS (op))
45028 return false;
45029 for (i = 0; i < nunits; i += 2)
45030 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45031 return false;
45032 return true;
45033 }
45034
45035 void
45036 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45037 bool uns_p, bool odd_p)
45038 {
45039 enum machine_mode mode = GET_MODE (op1);
45040 enum machine_mode wmode = GET_MODE (dest);
45041 rtx x;
45042 rtx orig_op1 = op1, orig_op2 = op2;
45043
45044 if (!nonimmediate_operand (op1, mode))
45045 op1 = force_reg (mode, op1);
45046 if (!nonimmediate_operand (op2, mode))
45047 op2 = force_reg (mode, op2);
45048
45049 /* We only play even/odd games with vectors of SImode. */
45050 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45051
45052 /* If we're looking for the odd results, shift those members down to
45053 the even slots. For some cpus this is faster than a PSHUFD. */
45054 if (odd_p)
45055 {
45056 /* For XOP use vpmacsdqh, but only for smult, as it is only
45057 signed. */
45058 if (TARGET_XOP && mode == V4SImode && !uns_p)
45059 {
45060 x = force_reg (wmode, CONST0_RTX (wmode));
45061 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45062 return;
45063 }
45064
45065 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45066 if (!const_vector_equal_evenodd_p (orig_op1))
45067 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45068 x, NULL, 1, OPTAB_DIRECT);
45069 if (!const_vector_equal_evenodd_p (orig_op2))
45070 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45071 x, NULL, 1, OPTAB_DIRECT);
45072 op1 = gen_lowpart (mode, op1);
45073 op2 = gen_lowpart (mode, op2);
45074 }
45075
45076 if (mode == V16SImode)
45077 {
45078 if (uns_p)
45079 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45080 else
45081 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45082 }
45083 else if (mode == V8SImode)
45084 {
45085 if (uns_p)
45086 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45087 else
45088 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45089 }
45090 else if (uns_p)
45091 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45092 else if (TARGET_SSE4_1)
45093 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45094 else
45095 {
45096 rtx s1, s2, t0, t1, t2;
45097
45098 /* The easiest way to implement this without PMULDQ is to go through
45099 the motions as if we are performing a full 64-bit multiply. With
45100 the exception that we need to do less shuffling of the elements. */
45101
45102 /* Compute the sign-extension, aka highparts, of the two operands. */
45103 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45104 op1, pc_rtx, pc_rtx);
45105 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45106 op2, pc_rtx, pc_rtx);
45107
45108 /* Multiply LO(A) * HI(B), and vice-versa. */
45109 t1 = gen_reg_rtx (wmode);
45110 t2 = gen_reg_rtx (wmode);
45111 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45112 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45113
45114 /* Multiply LO(A) * LO(B). */
45115 t0 = gen_reg_rtx (wmode);
45116 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45117
45118 /* Combine and shift the highparts into place. */
45119 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45120 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45121 1, OPTAB_DIRECT);
45122
45123 /* Combine high and low parts. */
45124 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45125 return;
45126 }
45127 emit_insn (x);
45128 }
45129
45130 void
45131 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45132 bool uns_p, bool high_p)
45133 {
45134 enum machine_mode wmode = GET_MODE (dest);
45135 enum machine_mode mode = GET_MODE (op1);
45136 rtx t1, t2, t3, t4, mask;
45137
45138 switch (mode)
45139 {
45140 case V4SImode:
45141 t1 = gen_reg_rtx (mode);
45142 t2 = gen_reg_rtx (mode);
45143 if (TARGET_XOP && !uns_p)
45144 {
45145 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45146 shuffle the elements once so that all elements are in the right
45147 place for immediate use: { A C B D }. */
45148 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45149 const1_rtx, GEN_INT (3)));
45150 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45151 const1_rtx, GEN_INT (3)));
45152 }
45153 else
45154 {
45155 /* Put the elements into place for the multiply. */
45156 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45157 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45158 high_p = false;
45159 }
45160 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45161 break;
45162
45163 case V8SImode:
45164 /* Shuffle the elements between the lanes. After this we
45165 have { A B E F | C D G H } for each operand. */
45166 t1 = gen_reg_rtx (V4DImode);
45167 t2 = gen_reg_rtx (V4DImode);
45168 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45169 const0_rtx, const2_rtx,
45170 const1_rtx, GEN_INT (3)));
45171 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45172 const0_rtx, const2_rtx,
45173 const1_rtx, GEN_INT (3)));
45174
45175 /* Shuffle the elements within the lanes. After this we
45176 have { A A B B | C C D D } or { E E F F | G G H H }. */
45177 t3 = gen_reg_rtx (V8SImode);
45178 t4 = gen_reg_rtx (V8SImode);
45179 mask = GEN_INT (high_p
45180 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45181 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45182 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45183 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45184
45185 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45186 break;
45187
45188 case V8HImode:
45189 case V16HImode:
45190 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45191 uns_p, OPTAB_DIRECT);
45192 t2 = expand_binop (mode,
45193 uns_p ? umul_highpart_optab : smul_highpart_optab,
45194 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45195 gcc_assert (t1 && t2);
45196
45197 t3 = gen_reg_rtx (mode);
45198 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45199 emit_move_insn (dest, gen_lowpart (wmode, t3));
45200 break;
45201
45202 case V16QImode:
45203 case V32QImode:
45204 t1 = gen_reg_rtx (wmode);
45205 t2 = gen_reg_rtx (wmode);
45206 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45207 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45208
45209 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45210 break;
45211
45212 default:
45213 gcc_unreachable ();
45214 }
45215 }
45216
45217 void
45218 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45219 {
45220 rtx res_1, res_2, res_3, res_4;
45221
45222 res_1 = gen_reg_rtx (V4SImode);
45223 res_2 = gen_reg_rtx (V4SImode);
45224 res_3 = gen_reg_rtx (V2DImode);
45225 res_4 = gen_reg_rtx (V2DImode);
45226 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45227 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45228
45229 /* Move the results in element 2 down to element 1; we don't care
45230 what goes in elements 2 and 3. Then we can merge the parts
45231 back together with an interleave.
45232
45233 Note that two other sequences were tried:
45234 (1) Use interleaves at the start instead of psrldq, which allows
45235 us to use a single shufps to merge things back at the end.
45236 (2) Use shufps here to combine the two vectors, then pshufd to
45237 put the elements in the correct order.
45238 In both cases the cost of the reformatting stall was too high
45239 and the overall sequence slower. */
45240
45241 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45242 const0_rtx, const2_rtx,
45243 const0_rtx, const0_rtx));
45244 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45245 const0_rtx, const2_rtx,
45246 const0_rtx, const0_rtx));
45247 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45248
45249 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45250 }
45251
45252 void
45253 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45254 {
45255 enum machine_mode mode = GET_MODE (op0);
45256 rtx t1, t2, t3, t4, t5, t6;
45257
45258 if (TARGET_XOP && mode == V2DImode)
45259 {
45260 /* op1: A,B,C,D, op2: E,F,G,H */
45261 op1 = gen_lowpart (V4SImode, op1);
45262 op2 = gen_lowpart (V4SImode, op2);
45263
45264 t1 = gen_reg_rtx (V4SImode);
45265 t2 = gen_reg_rtx (V4SImode);
45266 t3 = gen_reg_rtx (V2DImode);
45267 t4 = gen_reg_rtx (V2DImode);
45268
45269 /* t1: B,A,D,C */
45270 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45271 GEN_INT (1),
45272 GEN_INT (0),
45273 GEN_INT (3),
45274 GEN_INT (2)));
45275
45276 /* t2: (B*E),(A*F),(D*G),(C*H) */
45277 emit_insn (gen_mulv4si3 (t2, t1, op2));
45278
45279 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45280 emit_insn (gen_xop_phadddq (t3, t2));
45281
45282 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45283 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45284
45285 /* Multiply lower parts and add all */
45286 t5 = gen_reg_rtx (V2DImode);
45287 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45288 gen_lowpart (V4SImode, op1),
45289 gen_lowpart (V4SImode, op2)));
45290 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45291
45292 }
45293 else
45294 {
45295 enum machine_mode nmode;
45296 rtx (*umul) (rtx, rtx, rtx);
45297
45298 if (mode == V2DImode)
45299 {
45300 umul = gen_vec_widen_umult_even_v4si;
45301 nmode = V4SImode;
45302 }
45303 else if (mode == V4DImode)
45304 {
45305 umul = gen_vec_widen_umult_even_v8si;
45306 nmode = V8SImode;
45307 }
45308 else if (mode == V8DImode)
45309 {
45310 umul = gen_vec_widen_umult_even_v16si;
45311 nmode = V16SImode;
45312 }
45313 else
45314 gcc_unreachable ();
45315
45316
45317 /* Multiply low parts. */
45318 t1 = gen_reg_rtx (mode);
45319 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45320
45321 /* Shift input vectors right 32 bits so we can multiply high parts. */
45322 t6 = GEN_INT (32);
45323 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45324 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45325
45326 /* Multiply high parts by low parts. */
45327 t4 = gen_reg_rtx (mode);
45328 t5 = gen_reg_rtx (mode);
45329 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45330 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45331
45332 /* Combine and shift the highparts back. */
45333 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45334 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45335
45336 /* Combine high and low parts. */
45337 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45338 }
45339
45340 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45341 gen_rtx_MULT (mode, op1, op2));
45342 }
45343
45344 /* Calculate integer abs() using only SSE2 instructions. */
45345
45346 void
45347 ix86_expand_sse2_abs (rtx target, rtx input)
45348 {
45349 enum machine_mode mode = GET_MODE (target);
45350 rtx tmp0, tmp1, x;
45351
45352 switch (mode)
45353 {
45354 /* For 32-bit signed integer X, the best way to calculate the absolute
45355 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45356 case V4SImode:
45357 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45358 GEN_INT (GET_MODE_BITSIZE
45359 (GET_MODE_INNER (mode)) - 1),
45360 NULL, 0, OPTAB_DIRECT);
45361 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45362 NULL, 0, OPTAB_DIRECT);
45363 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45364 target, 0, OPTAB_DIRECT);
45365 break;
45366
45367 /* For 16-bit signed integer X, the best way to calculate the absolute
45368 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45369 case V8HImode:
45370 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45371
45372 x = expand_simple_binop (mode, SMAX, tmp0, input,
45373 target, 0, OPTAB_DIRECT);
45374 break;
45375
45376 /* For 8-bit signed integer X, the best way to calculate the absolute
45377 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45378 as SSE2 provides the PMINUB insn. */
45379 case V16QImode:
45380 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45381
45382 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45383 target, 0, OPTAB_DIRECT);
45384 break;
45385
45386 default:
45387 gcc_unreachable ();
45388 }
45389
45390 if (x != target)
45391 emit_move_insn (target, x);
45392 }
45393
45394 /* Expand an insert into a vector register through pinsr insn.
45395 Return true if successful. */
45396
45397 bool
45398 ix86_expand_pinsr (rtx *operands)
45399 {
45400 rtx dst = operands[0];
45401 rtx src = operands[3];
45402
45403 unsigned int size = INTVAL (operands[1]);
45404 unsigned int pos = INTVAL (operands[2]);
45405
45406 if (GET_CODE (dst) == SUBREG)
45407 {
45408 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45409 dst = SUBREG_REG (dst);
45410 }
45411
45412 if (GET_CODE (src) == SUBREG)
45413 src = SUBREG_REG (src);
45414
45415 switch (GET_MODE (dst))
45416 {
45417 case V16QImode:
45418 case V8HImode:
45419 case V4SImode:
45420 case V2DImode:
45421 {
45422 enum machine_mode srcmode, dstmode;
45423 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45424
45425 srcmode = mode_for_size (size, MODE_INT, 0);
45426
45427 switch (srcmode)
45428 {
45429 case QImode:
45430 if (!TARGET_SSE4_1)
45431 return false;
45432 dstmode = V16QImode;
45433 pinsr = gen_sse4_1_pinsrb;
45434 break;
45435
45436 case HImode:
45437 if (!TARGET_SSE2)
45438 return false;
45439 dstmode = V8HImode;
45440 pinsr = gen_sse2_pinsrw;
45441 break;
45442
45443 case SImode:
45444 if (!TARGET_SSE4_1)
45445 return false;
45446 dstmode = V4SImode;
45447 pinsr = gen_sse4_1_pinsrd;
45448 break;
45449
45450 case DImode:
45451 gcc_assert (TARGET_64BIT);
45452 if (!TARGET_SSE4_1)
45453 return false;
45454 dstmode = V2DImode;
45455 pinsr = gen_sse4_1_pinsrq;
45456 break;
45457
45458 default:
45459 return false;
45460 }
45461
45462 rtx d = dst;
45463 if (GET_MODE (dst) != dstmode)
45464 d = gen_reg_rtx (dstmode);
45465 src = gen_lowpart (srcmode, src);
45466
45467 pos /= size;
45468
45469 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45470 GEN_INT (1 << pos)));
45471 if (d != dst)
45472 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45473 return true;
45474 }
45475
45476 default:
45477 return false;
45478 }
45479 }
45480 \f
45481 /* This function returns the calling abi specific va_list type node.
45482 It returns the FNDECL specific va_list type. */
45483
45484 static tree
45485 ix86_fn_abi_va_list (tree fndecl)
45486 {
45487 if (!TARGET_64BIT)
45488 return va_list_type_node;
45489 gcc_assert (fndecl != NULL_TREE);
45490
45491 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45492 return ms_va_list_type_node;
45493 else
45494 return sysv_va_list_type_node;
45495 }
45496
45497 /* Returns the canonical va_list type specified by TYPE. If there
45498 is no valid TYPE provided, it return NULL_TREE. */
45499
45500 static tree
45501 ix86_canonical_va_list_type (tree type)
45502 {
45503 tree wtype, htype;
45504
45505 /* Resolve references and pointers to va_list type. */
45506 if (TREE_CODE (type) == MEM_REF)
45507 type = TREE_TYPE (type);
45508 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45509 type = TREE_TYPE (type);
45510 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45511 type = TREE_TYPE (type);
45512
45513 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45514 {
45515 wtype = va_list_type_node;
45516 gcc_assert (wtype != NULL_TREE);
45517 htype = type;
45518 if (TREE_CODE (wtype) == ARRAY_TYPE)
45519 {
45520 /* If va_list is an array type, the argument may have decayed
45521 to a pointer type, e.g. by being passed to another function.
45522 In that case, unwrap both types so that we can compare the
45523 underlying records. */
45524 if (TREE_CODE (htype) == ARRAY_TYPE
45525 || POINTER_TYPE_P (htype))
45526 {
45527 wtype = TREE_TYPE (wtype);
45528 htype = TREE_TYPE (htype);
45529 }
45530 }
45531 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45532 return va_list_type_node;
45533 wtype = sysv_va_list_type_node;
45534 gcc_assert (wtype != NULL_TREE);
45535 htype = type;
45536 if (TREE_CODE (wtype) == ARRAY_TYPE)
45537 {
45538 /* If va_list is an array type, the argument may have decayed
45539 to a pointer type, e.g. by being passed to another function.
45540 In that case, unwrap both types so that we can compare the
45541 underlying records. */
45542 if (TREE_CODE (htype) == ARRAY_TYPE
45543 || POINTER_TYPE_P (htype))
45544 {
45545 wtype = TREE_TYPE (wtype);
45546 htype = TREE_TYPE (htype);
45547 }
45548 }
45549 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45550 return sysv_va_list_type_node;
45551 wtype = ms_va_list_type_node;
45552 gcc_assert (wtype != NULL_TREE);
45553 htype = type;
45554 if (TREE_CODE (wtype) == ARRAY_TYPE)
45555 {
45556 /* If va_list is an array type, the argument may have decayed
45557 to a pointer type, e.g. by being passed to another function.
45558 In that case, unwrap both types so that we can compare the
45559 underlying records. */
45560 if (TREE_CODE (htype) == ARRAY_TYPE
45561 || POINTER_TYPE_P (htype))
45562 {
45563 wtype = TREE_TYPE (wtype);
45564 htype = TREE_TYPE (htype);
45565 }
45566 }
45567 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45568 return ms_va_list_type_node;
45569 return NULL_TREE;
45570 }
45571 return std_canonical_va_list_type (type);
45572 }
45573
45574 /* Iterate through the target-specific builtin types for va_list.
45575 IDX denotes the iterator, *PTREE is set to the result type of
45576 the va_list builtin, and *PNAME to its internal type.
45577 Returns zero if there is no element for this index, otherwise
45578 IDX should be increased upon the next call.
45579 Note, do not iterate a base builtin's name like __builtin_va_list.
45580 Used from c_common_nodes_and_builtins. */
45581
45582 static int
45583 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45584 {
45585 if (TARGET_64BIT)
45586 {
45587 switch (idx)
45588 {
45589 default:
45590 break;
45591
45592 case 0:
45593 *ptree = ms_va_list_type_node;
45594 *pname = "__builtin_ms_va_list";
45595 return 1;
45596
45597 case 1:
45598 *ptree = sysv_va_list_type_node;
45599 *pname = "__builtin_sysv_va_list";
45600 return 1;
45601 }
45602 }
45603
45604 return 0;
45605 }
45606
45607 #undef TARGET_SCHED_DISPATCH
45608 #define TARGET_SCHED_DISPATCH has_dispatch
45609 #undef TARGET_SCHED_DISPATCH_DO
45610 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45611 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45612 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45613 #undef TARGET_SCHED_REORDER
45614 #define TARGET_SCHED_REORDER ix86_sched_reorder
45615 #undef TARGET_SCHED_ADJUST_PRIORITY
45616 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45617 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45618 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45619 ix86_dependencies_evaluation_hook
45620
45621 /* The size of the dispatch window is the total number of bytes of
45622 object code allowed in a window. */
45623 #define DISPATCH_WINDOW_SIZE 16
45624
45625 /* Number of dispatch windows considered for scheduling. */
45626 #define MAX_DISPATCH_WINDOWS 3
45627
45628 /* Maximum number of instructions in a window. */
45629 #define MAX_INSN 4
45630
45631 /* Maximum number of immediate operands in a window. */
45632 #define MAX_IMM 4
45633
45634 /* Maximum number of immediate bits allowed in a window. */
45635 #define MAX_IMM_SIZE 128
45636
45637 /* Maximum number of 32 bit immediates allowed in a window. */
45638 #define MAX_IMM_32 4
45639
45640 /* Maximum number of 64 bit immediates allowed in a window. */
45641 #define MAX_IMM_64 2
45642
45643 /* Maximum total of loads or prefetches allowed in a window. */
45644 #define MAX_LOAD 2
45645
45646 /* Maximum total of stores allowed in a window. */
45647 #define MAX_STORE 1
45648
45649 #undef BIG
45650 #define BIG 100
45651
45652
45653 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45654 enum dispatch_group {
45655 disp_no_group = 0,
45656 disp_load,
45657 disp_store,
45658 disp_load_store,
45659 disp_prefetch,
45660 disp_imm,
45661 disp_imm_32,
45662 disp_imm_64,
45663 disp_branch,
45664 disp_cmp,
45665 disp_jcc,
45666 disp_last
45667 };
45668
45669 /* Number of allowable groups in a dispatch window. It is an array
45670 indexed by dispatch_group enum. 100 is used as a big number,
45671 because the number of these kind of operations does not have any
45672 effect in dispatch window, but we need them for other reasons in
45673 the table. */
45674 static unsigned int num_allowable_groups[disp_last] = {
45675 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45676 };
45677
45678 char group_name[disp_last + 1][16] = {
45679 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45680 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45681 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45682 };
45683
45684 /* Instruction path. */
45685 enum insn_path {
45686 no_path = 0,
45687 path_single, /* Single micro op. */
45688 path_double, /* Double micro op. */
45689 path_multi, /* Instructions with more than 2 micro op.. */
45690 last_path
45691 };
45692
45693 /* sched_insn_info defines a window to the instructions scheduled in
45694 the basic block. It contains a pointer to the insn_info table and
45695 the instruction scheduled.
45696
45697 Windows are allocated for each basic block and are linked
45698 together. */
45699 typedef struct sched_insn_info_s {
45700 rtx insn;
45701 enum dispatch_group group;
45702 enum insn_path path;
45703 int byte_len;
45704 int imm_bytes;
45705 } sched_insn_info;
45706
45707 /* Linked list of dispatch windows. This is a two way list of
45708 dispatch windows of a basic block. It contains information about
45709 the number of uops in the window and the total number of
45710 instructions and of bytes in the object code for this dispatch
45711 window. */
45712 typedef struct dispatch_windows_s {
45713 int num_insn; /* Number of insn in the window. */
45714 int num_uops; /* Number of uops in the window. */
45715 int window_size; /* Number of bytes in the window. */
45716 int window_num; /* Window number between 0 or 1. */
45717 int num_imm; /* Number of immediates in an insn. */
45718 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45719 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45720 int imm_size; /* Total immediates in the window. */
45721 int num_loads; /* Total memory loads in the window. */
45722 int num_stores; /* Total memory stores in the window. */
45723 int violation; /* Violation exists in window. */
45724 sched_insn_info *window; /* Pointer to the window. */
45725 struct dispatch_windows_s *next;
45726 struct dispatch_windows_s *prev;
45727 } dispatch_windows;
45728
45729 /* Immediate valuse used in an insn. */
45730 typedef struct imm_info_s
45731 {
45732 int imm;
45733 int imm32;
45734 int imm64;
45735 } imm_info;
45736
45737 static dispatch_windows *dispatch_window_list;
45738 static dispatch_windows *dispatch_window_list1;
45739
45740 /* Get dispatch group of insn. */
45741
45742 static enum dispatch_group
45743 get_mem_group (rtx insn)
45744 {
45745 enum attr_memory memory;
45746
45747 if (INSN_CODE (insn) < 0)
45748 return disp_no_group;
45749 memory = get_attr_memory (insn);
45750 if (memory == MEMORY_STORE)
45751 return disp_store;
45752
45753 if (memory == MEMORY_LOAD)
45754 return disp_load;
45755
45756 if (memory == MEMORY_BOTH)
45757 return disp_load_store;
45758
45759 return disp_no_group;
45760 }
45761
45762 /* Return true if insn is a compare instruction. */
45763
45764 static bool
45765 is_cmp (rtx insn)
45766 {
45767 enum attr_type type;
45768
45769 type = get_attr_type (insn);
45770 return (type == TYPE_TEST
45771 || type == TYPE_ICMP
45772 || type == TYPE_FCMP
45773 || GET_CODE (PATTERN (insn)) == COMPARE);
45774 }
45775
45776 /* Return true if a dispatch violation encountered. */
45777
45778 static bool
45779 dispatch_violation (void)
45780 {
45781 if (dispatch_window_list->next)
45782 return dispatch_window_list->next->violation;
45783 return dispatch_window_list->violation;
45784 }
45785
45786 /* Return true if insn is a branch instruction. */
45787
45788 static bool
45789 is_branch (rtx insn)
45790 {
45791 return (CALL_P (insn) || JUMP_P (insn));
45792 }
45793
45794 /* Return true if insn is a prefetch instruction. */
45795
45796 static bool
45797 is_prefetch (rtx insn)
45798 {
45799 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45800 }
45801
45802 /* This function initializes a dispatch window and the list container holding a
45803 pointer to the window. */
45804
45805 static void
45806 init_window (int window_num)
45807 {
45808 int i;
45809 dispatch_windows *new_list;
45810
45811 if (window_num == 0)
45812 new_list = dispatch_window_list;
45813 else
45814 new_list = dispatch_window_list1;
45815
45816 new_list->num_insn = 0;
45817 new_list->num_uops = 0;
45818 new_list->window_size = 0;
45819 new_list->next = NULL;
45820 new_list->prev = NULL;
45821 new_list->window_num = window_num;
45822 new_list->num_imm = 0;
45823 new_list->num_imm_32 = 0;
45824 new_list->num_imm_64 = 0;
45825 new_list->imm_size = 0;
45826 new_list->num_loads = 0;
45827 new_list->num_stores = 0;
45828 new_list->violation = false;
45829
45830 for (i = 0; i < MAX_INSN; i++)
45831 {
45832 new_list->window[i].insn = NULL;
45833 new_list->window[i].group = disp_no_group;
45834 new_list->window[i].path = no_path;
45835 new_list->window[i].byte_len = 0;
45836 new_list->window[i].imm_bytes = 0;
45837 }
45838 return;
45839 }
45840
45841 /* This function allocates and initializes a dispatch window and the
45842 list container holding a pointer to the window. */
45843
45844 static dispatch_windows *
45845 allocate_window (void)
45846 {
45847 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45848 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45849
45850 return new_list;
45851 }
45852
45853 /* This routine initializes the dispatch scheduling information. It
45854 initiates building dispatch scheduler tables and constructs the
45855 first dispatch window. */
45856
45857 static void
45858 init_dispatch_sched (void)
45859 {
45860 /* Allocate a dispatch list and a window. */
45861 dispatch_window_list = allocate_window ();
45862 dispatch_window_list1 = allocate_window ();
45863 init_window (0);
45864 init_window (1);
45865 }
45866
45867 /* This function returns true if a branch is detected. End of a basic block
45868 does not have to be a branch, but here we assume only branches end a
45869 window. */
45870
45871 static bool
45872 is_end_basic_block (enum dispatch_group group)
45873 {
45874 return group == disp_branch;
45875 }
45876
45877 /* This function is called when the end of a window processing is reached. */
45878
45879 static void
45880 process_end_window (void)
45881 {
45882 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45883 if (dispatch_window_list->next)
45884 {
45885 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45886 gcc_assert (dispatch_window_list->window_size
45887 + dispatch_window_list1->window_size <= 48);
45888 init_window (1);
45889 }
45890 init_window (0);
45891 }
45892
45893 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45894 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45895 for 48 bytes of instructions. Note that these windows are not dispatch
45896 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45897
45898 static dispatch_windows *
45899 allocate_next_window (int window_num)
45900 {
45901 if (window_num == 0)
45902 {
45903 if (dispatch_window_list->next)
45904 init_window (1);
45905 init_window (0);
45906 return dispatch_window_list;
45907 }
45908
45909 dispatch_window_list->next = dispatch_window_list1;
45910 dispatch_window_list1->prev = dispatch_window_list;
45911
45912 return dispatch_window_list1;
45913 }
45914
45915 /* Increment the number of immediate operands of an instruction. */
45916
45917 static int
45918 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45919 {
45920 if (*in_rtx == 0)
45921 return 0;
45922
45923 switch ( GET_CODE (*in_rtx))
45924 {
45925 case CONST:
45926 case SYMBOL_REF:
45927 case CONST_INT:
45928 (imm_values->imm)++;
45929 if (x86_64_immediate_operand (*in_rtx, SImode))
45930 (imm_values->imm32)++;
45931 else
45932 (imm_values->imm64)++;
45933 break;
45934
45935 case CONST_DOUBLE:
45936 (imm_values->imm)++;
45937 (imm_values->imm64)++;
45938 break;
45939
45940 case CODE_LABEL:
45941 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45942 {
45943 (imm_values->imm)++;
45944 (imm_values->imm32)++;
45945 }
45946 break;
45947
45948 default:
45949 break;
45950 }
45951
45952 return 0;
45953 }
45954
45955 /* Compute number of immediate operands of an instruction. */
45956
45957 static void
45958 find_constant (rtx in_rtx, imm_info *imm_values)
45959 {
45960 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45961 (rtx_function) find_constant_1, (void *) imm_values);
45962 }
45963
45964 /* Return total size of immediate operands of an instruction along with number
45965 of corresponding immediate-operands. It initializes its parameters to zero
45966 befor calling FIND_CONSTANT.
45967 INSN is the input instruction. IMM is the total of immediates.
45968 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45969 bit immediates. */
45970
45971 static int
45972 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45973 {
45974 imm_info imm_values = {0, 0, 0};
45975
45976 find_constant (insn, &imm_values);
45977 *imm = imm_values.imm;
45978 *imm32 = imm_values.imm32;
45979 *imm64 = imm_values.imm64;
45980 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45981 }
45982
45983 /* This function indicates if an operand of an instruction is an
45984 immediate. */
45985
45986 static bool
45987 has_immediate (rtx insn)
45988 {
45989 int num_imm_operand;
45990 int num_imm32_operand;
45991 int num_imm64_operand;
45992
45993 if (insn)
45994 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45995 &num_imm64_operand);
45996 return false;
45997 }
45998
45999 /* Return single or double path for instructions. */
46000
46001 static enum insn_path
46002 get_insn_path (rtx insn)
46003 {
46004 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46005
46006 if ((int)path == 0)
46007 return path_single;
46008
46009 if ((int)path == 1)
46010 return path_double;
46011
46012 return path_multi;
46013 }
46014
46015 /* Return insn dispatch group. */
46016
46017 static enum dispatch_group
46018 get_insn_group (rtx insn)
46019 {
46020 enum dispatch_group group = get_mem_group (insn);
46021 if (group)
46022 return group;
46023
46024 if (is_branch (insn))
46025 return disp_branch;
46026
46027 if (is_cmp (insn))
46028 return disp_cmp;
46029
46030 if (has_immediate (insn))
46031 return disp_imm;
46032
46033 if (is_prefetch (insn))
46034 return disp_prefetch;
46035
46036 return disp_no_group;
46037 }
46038
46039 /* Count number of GROUP restricted instructions in a dispatch
46040 window WINDOW_LIST. */
46041
46042 static int
46043 count_num_restricted (rtx insn, dispatch_windows *window_list)
46044 {
46045 enum dispatch_group group = get_insn_group (insn);
46046 int imm_size;
46047 int num_imm_operand;
46048 int num_imm32_operand;
46049 int num_imm64_operand;
46050
46051 if (group == disp_no_group)
46052 return 0;
46053
46054 if (group == disp_imm)
46055 {
46056 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46057 &num_imm64_operand);
46058 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46059 || num_imm_operand + window_list->num_imm > MAX_IMM
46060 || (num_imm32_operand > 0
46061 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46062 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46063 || (num_imm64_operand > 0
46064 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46065 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46066 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46067 && num_imm64_operand > 0
46068 && ((window_list->num_imm_64 > 0
46069 && window_list->num_insn >= 2)
46070 || window_list->num_insn >= 3)))
46071 return BIG;
46072
46073 return 1;
46074 }
46075
46076 if ((group == disp_load_store
46077 && (window_list->num_loads >= MAX_LOAD
46078 || window_list->num_stores >= MAX_STORE))
46079 || ((group == disp_load
46080 || group == disp_prefetch)
46081 && window_list->num_loads >= MAX_LOAD)
46082 || (group == disp_store
46083 && window_list->num_stores >= MAX_STORE))
46084 return BIG;
46085
46086 return 1;
46087 }
46088
46089 /* This function returns true if insn satisfies dispatch rules on the
46090 last window scheduled. */
46091
46092 static bool
46093 fits_dispatch_window (rtx insn)
46094 {
46095 dispatch_windows *window_list = dispatch_window_list;
46096 dispatch_windows *window_list_next = dispatch_window_list->next;
46097 unsigned int num_restrict;
46098 enum dispatch_group group = get_insn_group (insn);
46099 enum insn_path path = get_insn_path (insn);
46100 int sum;
46101
46102 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46103 instructions should be given the lowest priority in the
46104 scheduling process in Haifa scheduler to make sure they will be
46105 scheduled in the same dispatch window as the reference to them. */
46106 if (group == disp_jcc || group == disp_cmp)
46107 return false;
46108
46109 /* Check nonrestricted. */
46110 if (group == disp_no_group || group == disp_branch)
46111 return true;
46112
46113 /* Get last dispatch window. */
46114 if (window_list_next)
46115 window_list = window_list_next;
46116
46117 if (window_list->window_num == 1)
46118 {
46119 sum = window_list->prev->window_size + window_list->window_size;
46120
46121 if (sum == 32
46122 || (min_insn_size (insn) + sum) >= 48)
46123 /* Window 1 is full. Go for next window. */
46124 return true;
46125 }
46126
46127 num_restrict = count_num_restricted (insn, window_list);
46128
46129 if (num_restrict > num_allowable_groups[group])
46130 return false;
46131
46132 /* See if it fits in the first window. */
46133 if (window_list->window_num == 0)
46134 {
46135 /* The first widow should have only single and double path
46136 uops. */
46137 if (path == path_double
46138 && (window_list->num_uops + 2) > MAX_INSN)
46139 return false;
46140 else if (path != path_single)
46141 return false;
46142 }
46143 return true;
46144 }
46145
46146 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46147 dispatch window WINDOW_LIST. */
46148
46149 static void
46150 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46151 {
46152 int byte_len = min_insn_size (insn);
46153 int num_insn = window_list->num_insn;
46154 int imm_size;
46155 sched_insn_info *window = window_list->window;
46156 enum dispatch_group group = get_insn_group (insn);
46157 enum insn_path path = get_insn_path (insn);
46158 int num_imm_operand;
46159 int num_imm32_operand;
46160 int num_imm64_operand;
46161
46162 if (!window_list->violation && group != disp_cmp
46163 && !fits_dispatch_window (insn))
46164 window_list->violation = true;
46165
46166 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46167 &num_imm64_operand);
46168
46169 /* Initialize window with new instruction. */
46170 window[num_insn].insn = insn;
46171 window[num_insn].byte_len = byte_len;
46172 window[num_insn].group = group;
46173 window[num_insn].path = path;
46174 window[num_insn].imm_bytes = imm_size;
46175
46176 window_list->window_size += byte_len;
46177 window_list->num_insn = num_insn + 1;
46178 window_list->num_uops = window_list->num_uops + num_uops;
46179 window_list->imm_size += imm_size;
46180 window_list->num_imm += num_imm_operand;
46181 window_list->num_imm_32 += num_imm32_operand;
46182 window_list->num_imm_64 += num_imm64_operand;
46183
46184 if (group == disp_store)
46185 window_list->num_stores += 1;
46186 else if (group == disp_load
46187 || group == disp_prefetch)
46188 window_list->num_loads += 1;
46189 else if (group == disp_load_store)
46190 {
46191 window_list->num_stores += 1;
46192 window_list->num_loads += 1;
46193 }
46194 }
46195
46196 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46197 If the total bytes of instructions or the number of instructions in
46198 the window exceed allowable, it allocates a new window. */
46199
46200 static void
46201 add_to_dispatch_window (rtx insn)
46202 {
46203 int byte_len;
46204 dispatch_windows *window_list;
46205 dispatch_windows *next_list;
46206 dispatch_windows *window0_list;
46207 enum insn_path path;
46208 enum dispatch_group insn_group;
46209 bool insn_fits;
46210 int num_insn;
46211 int num_uops;
46212 int window_num;
46213 int insn_num_uops;
46214 int sum;
46215
46216 if (INSN_CODE (insn) < 0)
46217 return;
46218
46219 byte_len = min_insn_size (insn);
46220 window_list = dispatch_window_list;
46221 next_list = window_list->next;
46222 path = get_insn_path (insn);
46223 insn_group = get_insn_group (insn);
46224
46225 /* Get the last dispatch window. */
46226 if (next_list)
46227 window_list = dispatch_window_list->next;
46228
46229 if (path == path_single)
46230 insn_num_uops = 1;
46231 else if (path == path_double)
46232 insn_num_uops = 2;
46233 else
46234 insn_num_uops = (int) path;
46235
46236 /* If current window is full, get a new window.
46237 Window number zero is full, if MAX_INSN uops are scheduled in it.
46238 Window number one is full, if window zero's bytes plus window
46239 one's bytes is 32, or if the bytes of the new instruction added
46240 to the total makes it greater than 48, or it has already MAX_INSN
46241 instructions in it. */
46242 num_insn = window_list->num_insn;
46243 num_uops = window_list->num_uops;
46244 window_num = window_list->window_num;
46245 insn_fits = fits_dispatch_window (insn);
46246
46247 if (num_insn >= MAX_INSN
46248 || num_uops + insn_num_uops > MAX_INSN
46249 || !(insn_fits))
46250 {
46251 window_num = ~window_num & 1;
46252 window_list = allocate_next_window (window_num);
46253 }
46254
46255 if (window_num == 0)
46256 {
46257 add_insn_window (insn, window_list, insn_num_uops);
46258 if (window_list->num_insn >= MAX_INSN
46259 && insn_group == disp_branch)
46260 {
46261 process_end_window ();
46262 return;
46263 }
46264 }
46265 else if (window_num == 1)
46266 {
46267 window0_list = window_list->prev;
46268 sum = window0_list->window_size + window_list->window_size;
46269 if (sum == 32
46270 || (byte_len + sum) >= 48)
46271 {
46272 process_end_window ();
46273 window_list = dispatch_window_list;
46274 }
46275
46276 add_insn_window (insn, window_list, insn_num_uops);
46277 }
46278 else
46279 gcc_unreachable ();
46280
46281 if (is_end_basic_block (insn_group))
46282 {
46283 /* End of basic block is reached do end-basic-block process. */
46284 process_end_window ();
46285 return;
46286 }
46287 }
46288
46289 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46290
46291 DEBUG_FUNCTION static void
46292 debug_dispatch_window_file (FILE *file, int window_num)
46293 {
46294 dispatch_windows *list;
46295 int i;
46296
46297 if (window_num == 0)
46298 list = dispatch_window_list;
46299 else
46300 list = dispatch_window_list1;
46301
46302 fprintf (file, "Window #%d:\n", list->window_num);
46303 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46304 list->num_insn, list->num_uops, list->window_size);
46305 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46306 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46307
46308 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46309 list->num_stores);
46310 fprintf (file, " insn info:\n");
46311
46312 for (i = 0; i < MAX_INSN; i++)
46313 {
46314 if (!list->window[i].insn)
46315 break;
46316 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46317 i, group_name[list->window[i].group],
46318 i, (void *)list->window[i].insn,
46319 i, list->window[i].path,
46320 i, list->window[i].byte_len,
46321 i, list->window[i].imm_bytes);
46322 }
46323 }
46324
46325 /* Print to stdout a dispatch window. */
46326
46327 DEBUG_FUNCTION void
46328 debug_dispatch_window (int window_num)
46329 {
46330 debug_dispatch_window_file (stdout, window_num);
46331 }
46332
46333 /* Print INSN dispatch information to FILE. */
46334
46335 DEBUG_FUNCTION static void
46336 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46337 {
46338 int byte_len;
46339 enum insn_path path;
46340 enum dispatch_group group;
46341 int imm_size;
46342 int num_imm_operand;
46343 int num_imm32_operand;
46344 int num_imm64_operand;
46345
46346 if (INSN_CODE (insn) < 0)
46347 return;
46348
46349 byte_len = min_insn_size (insn);
46350 path = get_insn_path (insn);
46351 group = get_insn_group (insn);
46352 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46353 &num_imm64_operand);
46354
46355 fprintf (file, " insn info:\n");
46356 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46357 group_name[group], path, byte_len);
46358 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46359 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46360 }
46361
46362 /* Print to STDERR the status of the ready list with respect to
46363 dispatch windows. */
46364
46365 DEBUG_FUNCTION void
46366 debug_ready_dispatch (void)
46367 {
46368 int i;
46369 int no_ready = number_in_ready ();
46370
46371 fprintf (stdout, "Number of ready: %d\n", no_ready);
46372
46373 for (i = 0; i < no_ready; i++)
46374 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46375 }
46376
46377 /* This routine is the driver of the dispatch scheduler. */
46378
46379 static void
46380 do_dispatch (rtx insn, int mode)
46381 {
46382 if (mode == DISPATCH_INIT)
46383 init_dispatch_sched ();
46384 else if (mode == ADD_TO_DISPATCH_WINDOW)
46385 add_to_dispatch_window (insn);
46386 }
46387
46388 /* Return TRUE if Dispatch Scheduling is supported. */
46389
46390 static bool
46391 has_dispatch (rtx insn, int action)
46392 {
46393 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46394 && flag_dispatch_scheduler)
46395 switch (action)
46396 {
46397 default:
46398 return false;
46399
46400 case IS_DISPATCH_ON:
46401 return true;
46402 break;
46403
46404 case IS_CMP:
46405 return is_cmp (insn);
46406
46407 case DISPATCH_VIOLATION:
46408 return dispatch_violation ();
46409
46410 case FITS_DISPATCH_WINDOW:
46411 return fits_dispatch_window (insn);
46412 }
46413
46414 return false;
46415 }
46416
46417 /* Implementation of reassociation_width target hook used by
46418 reassoc phase to identify parallelism level in reassociated
46419 tree. Statements tree_code is passed in OPC. Arguments type
46420 is passed in MODE.
46421
46422 Currently parallel reassociation is enabled for Atom
46423 processors only and we set reassociation width to be 2
46424 because Atom may issue up to 2 instructions per cycle.
46425
46426 Return value should be fixed if parallel reassociation is
46427 enabled for other processors. */
46428
46429 static int
46430 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46431 enum machine_mode mode)
46432 {
46433 int res = 1;
46434
46435 /* Vector part. */
46436 if (VECTOR_MODE_P (mode))
46437 {
46438 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46439 return 2;
46440 else
46441 return 1;
46442 }
46443
46444 /* Scalar part. */
46445 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46446 res = 2;
46447 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46448 res = 2;
46449
46450 return res;
46451 }
46452
46453 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46454 place emms and femms instructions. */
46455
46456 static enum machine_mode
46457 ix86_preferred_simd_mode (enum machine_mode mode)
46458 {
46459 if (!TARGET_SSE)
46460 return word_mode;
46461
46462 switch (mode)
46463 {
46464 case QImode:
46465 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46466 case HImode:
46467 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46468 case SImode:
46469 return TARGET_AVX512F ? V16SImode :
46470 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46471 case DImode:
46472 return TARGET_AVX512F ? V8DImode :
46473 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46474
46475 case SFmode:
46476 if (TARGET_AVX512F)
46477 return V16SFmode;
46478 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46479 return V8SFmode;
46480 else
46481 return V4SFmode;
46482
46483 case DFmode:
46484 if (!TARGET_VECTORIZE_DOUBLE)
46485 return word_mode;
46486 else if (TARGET_AVX512F)
46487 return V8DFmode;
46488 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46489 return V4DFmode;
46490 else if (TARGET_SSE2)
46491 return V2DFmode;
46492 /* FALLTHRU */
46493
46494 default:
46495 return word_mode;
46496 }
46497 }
46498
46499 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46500 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46501 256bit and 128bit vectors. */
46502
46503 static unsigned int
46504 ix86_autovectorize_vector_sizes (void)
46505 {
46506 return TARGET_AVX512F ? 64 | 32 | 16 :
46507 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46508 }
46509
46510 \f
46511
46512 /* Return class of registers which could be used for pseudo of MODE
46513 and of class RCLASS for spilling instead of memory. Return NO_REGS
46514 if it is not possible or non-profitable. */
46515 static reg_class_t
46516 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46517 {
46518 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46519 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46520 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46521 return ALL_SSE_REGS;
46522 return NO_REGS;
46523 }
46524
46525 /* Implement targetm.vectorize.init_cost. */
46526
46527 static void *
46528 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46529 {
46530 unsigned *cost = XNEWVEC (unsigned, 3);
46531 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46532 return cost;
46533 }
46534
46535 /* Implement targetm.vectorize.add_stmt_cost. */
46536
46537 static unsigned
46538 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46539 struct _stmt_vec_info *stmt_info, int misalign,
46540 enum vect_cost_model_location where)
46541 {
46542 unsigned *cost = (unsigned *) data;
46543 unsigned retval = 0;
46544
46545 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46546 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46547
46548 /* Statements in an inner loop relative to the loop being
46549 vectorized are weighted more heavily. The value here is
46550 arbitrary and could potentially be improved with analysis. */
46551 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46552 count *= 50; /* FIXME. */
46553
46554 retval = (unsigned) (count * stmt_cost);
46555
46556 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46557 for Silvermont as it has out of order integer pipeline and can execute
46558 2 scalar instruction per tick, but has in order SIMD pipeline. */
46559 if (TARGET_SILVERMONT || TARGET_INTEL)
46560 if (stmt_info && stmt_info->stmt)
46561 {
46562 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46563 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46564 retval = (retval * 17) / 10;
46565 }
46566
46567 cost[where] += retval;
46568
46569 return retval;
46570 }
46571
46572 /* Implement targetm.vectorize.finish_cost. */
46573
46574 static void
46575 ix86_finish_cost (void *data, unsigned *prologue_cost,
46576 unsigned *body_cost, unsigned *epilogue_cost)
46577 {
46578 unsigned *cost = (unsigned *) data;
46579 *prologue_cost = cost[vect_prologue];
46580 *body_cost = cost[vect_body];
46581 *epilogue_cost = cost[vect_epilogue];
46582 }
46583
46584 /* Implement targetm.vectorize.destroy_cost_data. */
46585
46586 static void
46587 ix86_destroy_cost_data (void *data)
46588 {
46589 free (data);
46590 }
46591
46592 /* Validate target specific memory model bits in VAL. */
46593
46594 static unsigned HOST_WIDE_INT
46595 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46596 {
46597 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46598 bool strong;
46599
46600 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46601 |MEMMODEL_MASK)
46602 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46603 {
46604 warning (OPT_Winvalid_memory_model,
46605 "Unknown architecture specific memory model");
46606 return MEMMODEL_SEQ_CST;
46607 }
46608 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46609 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46610 {
46611 warning (OPT_Winvalid_memory_model,
46612 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46613 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46614 }
46615 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46616 {
46617 warning (OPT_Winvalid_memory_model,
46618 "HLE_RELEASE not used with RELEASE or stronger memory model");
46619 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46620 }
46621 return val;
46622 }
46623
46624 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46625 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46626 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46627 or number of vecsize_mangle variants that should be emitted. */
46628
46629 static int
46630 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46631 struct cgraph_simd_clone *clonei,
46632 tree base_type, int num)
46633 {
46634 int ret = 1;
46635
46636 if (clonei->simdlen
46637 && (clonei->simdlen < 2
46638 || clonei->simdlen > 16
46639 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46640 {
46641 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46642 "unsupported simdlen %d", clonei->simdlen);
46643 return 0;
46644 }
46645
46646 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46647 if (TREE_CODE (ret_type) != VOID_TYPE)
46648 switch (TYPE_MODE (ret_type))
46649 {
46650 case QImode:
46651 case HImode:
46652 case SImode:
46653 case DImode:
46654 case SFmode:
46655 case DFmode:
46656 /* case SCmode: */
46657 /* case DCmode: */
46658 break;
46659 default:
46660 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46661 "unsupported return type %qT for simd\n", ret_type);
46662 return 0;
46663 }
46664
46665 tree t;
46666 int i;
46667
46668 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46669 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46670 switch (TYPE_MODE (TREE_TYPE (t)))
46671 {
46672 case QImode:
46673 case HImode:
46674 case SImode:
46675 case DImode:
46676 case SFmode:
46677 case DFmode:
46678 /* case SCmode: */
46679 /* case DCmode: */
46680 break;
46681 default:
46682 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46683 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46684 return 0;
46685 }
46686
46687 if (clonei->cilk_elemental)
46688 {
46689 /* Parse here processor clause. If not present, default to 'b'. */
46690 clonei->vecsize_mangle = 'b';
46691 }
46692 else if (!TREE_PUBLIC (node->decl))
46693 {
46694 /* If the function isn't exported, we can pick up just one ISA
46695 for the clones. */
46696 if (TARGET_AVX2)
46697 clonei->vecsize_mangle = 'd';
46698 else if (TARGET_AVX)
46699 clonei->vecsize_mangle = 'c';
46700 else
46701 clonei->vecsize_mangle = 'b';
46702 ret = 1;
46703 }
46704 else
46705 {
46706 clonei->vecsize_mangle = "bcd"[num];
46707 ret = 3;
46708 }
46709 switch (clonei->vecsize_mangle)
46710 {
46711 case 'b':
46712 clonei->vecsize_int = 128;
46713 clonei->vecsize_float = 128;
46714 break;
46715 case 'c':
46716 clonei->vecsize_int = 128;
46717 clonei->vecsize_float = 256;
46718 break;
46719 case 'd':
46720 clonei->vecsize_int = 256;
46721 clonei->vecsize_float = 256;
46722 break;
46723 }
46724 if (clonei->simdlen == 0)
46725 {
46726 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46727 clonei->simdlen = clonei->vecsize_int;
46728 else
46729 clonei->simdlen = clonei->vecsize_float;
46730 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46731 if (clonei->simdlen > 16)
46732 clonei->simdlen = 16;
46733 }
46734 return ret;
46735 }
46736
46737 /* Add target attribute to SIMD clone NODE if needed. */
46738
46739 static void
46740 ix86_simd_clone_adjust (struct cgraph_node *node)
46741 {
46742 const char *str = NULL;
46743 gcc_assert (node->decl == cfun->decl);
46744 switch (node->simdclone->vecsize_mangle)
46745 {
46746 case 'b':
46747 if (!TARGET_SSE2)
46748 str = "sse2";
46749 break;
46750 case 'c':
46751 if (!TARGET_AVX)
46752 str = "avx";
46753 break;
46754 case 'd':
46755 if (!TARGET_AVX2)
46756 str = "avx2";
46757 break;
46758 default:
46759 gcc_unreachable ();
46760 }
46761 if (str == NULL)
46762 return;
46763 push_cfun (NULL);
46764 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46765 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46766 gcc_assert (ok);
46767 pop_cfun ();
46768 ix86_previous_fndecl = NULL_TREE;
46769 ix86_set_current_function (node->decl);
46770 }
46771
46772 /* If SIMD clone NODE can't be used in a vectorized loop
46773 in current function, return -1, otherwise return a badness of using it
46774 (0 if it is most desirable from vecsize_mangle point of view, 1
46775 slightly less desirable, etc.). */
46776
46777 static int
46778 ix86_simd_clone_usable (struct cgraph_node *node)
46779 {
46780 switch (node->simdclone->vecsize_mangle)
46781 {
46782 case 'b':
46783 if (!TARGET_SSE2)
46784 return -1;
46785 if (!TARGET_AVX)
46786 return 0;
46787 return TARGET_AVX2 ? 2 : 1;
46788 case 'c':
46789 if (!TARGET_AVX)
46790 return -1;
46791 return TARGET_AVX2 ? 1 : 0;
46792 break;
46793 case 'd':
46794 if (!TARGET_AVX2)
46795 return -1;
46796 return 0;
46797 default:
46798 gcc_unreachable ();
46799 }
46800 }
46801
46802 /* This function gives out the number of memory references.
46803 This value determines the unrolling factor for
46804 bdver3 and bdver4 architectures. */
46805
46806 static int
46807 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46808 {
46809 if (*x != NULL_RTX && MEM_P (*x))
46810 {
46811 enum machine_mode mode;
46812 unsigned int n_words;
46813
46814 mode = GET_MODE (*x);
46815 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46816
46817 if (n_words > 4)
46818 (*mem_count)+=2;
46819 else
46820 (*mem_count)+=1;
46821 }
46822 return 0;
46823 }
46824
46825 /* This function adjusts the unroll factor based on
46826 the hardware capabilities. For ex, bdver3 has
46827 a loop buffer which makes unrolling of smaller
46828 loops less important. This function decides the
46829 unroll factor using number of memory references
46830 (value 32 is used) as a heuristic. */
46831
46832 static unsigned
46833 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46834 {
46835 basic_block *bbs;
46836 rtx insn;
46837 unsigned i;
46838 unsigned mem_count = 0;
46839
46840 if (!TARGET_ADJUST_UNROLL)
46841 return nunroll;
46842
46843 /* Count the number of memory references within the loop body. */
46844 bbs = get_loop_body (loop);
46845 for (i = 0; i < loop->num_nodes; i++)
46846 {
46847 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46848 if (NONDEBUG_INSN_P (insn))
46849 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46850 }
46851 free (bbs);
46852
46853 if (mem_count && mem_count <=32)
46854 return 32/mem_count;
46855
46856 return nunroll;
46857 }
46858
46859
46860 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46861
46862 static bool
46863 ix86_float_exceptions_rounding_supported_p (void)
46864 {
46865 /* For x87 floating point with standard excess precision handling,
46866 there is no adddf3 pattern (since x87 floating point only has
46867 XFmode operations) so the default hook implementation gets this
46868 wrong. */
46869 return TARGET_80387 || TARGET_SSE_MATH;
46870 }
46871
46872 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46873
46874 static void
46875 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46876 {
46877 if (!TARGET_80387 && !TARGET_SSE_MATH)
46878 return;
46879 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46880 if (TARGET_80387)
46881 {
46882 tree fenv_index_type = build_index_type (size_int (6));
46883 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46884 tree fenv_var = create_tmp_var (fenv_type, NULL);
46885 mark_addressable (fenv_var);
46886 tree fenv_ptr = build_pointer_type (fenv_type);
46887 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46888 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46889 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46890 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46891 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46892 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46893 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46894 tree hold_fnclex = build_call_expr (fnclex, 0);
46895 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46896 hold_fnclex);
46897 *clear = build_call_expr (fnclex, 0);
46898 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46899 mark_addressable (sw_var);
46900 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46901 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46902 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46903 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46904 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46905 exceptions_var, exceptions_x87);
46906 *update = build2 (COMPOUND_EXPR, integer_type_node,
46907 fnstsw_call, update_mod);
46908 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46909 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46910 }
46911 if (TARGET_SSE_MATH)
46912 {
46913 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46914 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46915 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46916 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46917 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46918 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46919 mxcsr_orig_var, stmxcsr_hold_call);
46920 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46921 mxcsr_orig_var,
46922 build_int_cst (unsigned_type_node, 0x1f80));
46923 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46924 build_int_cst (unsigned_type_node, 0xffffffc0));
46925 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46926 mxcsr_mod_var, hold_mod_val);
46927 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46928 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46929 hold_assign_orig, hold_assign_mod);
46930 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46931 ldmxcsr_hold_call);
46932 if (*hold)
46933 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46934 else
46935 *hold = hold_all;
46936 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46937 if (*clear)
46938 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46939 ldmxcsr_clear_call);
46940 else
46941 *clear = ldmxcsr_clear_call;
46942 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46943 tree exceptions_sse = fold_convert (integer_type_node,
46944 stxmcsr_update_call);
46945 if (*update)
46946 {
46947 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46948 exceptions_var, exceptions_sse);
46949 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46950 exceptions_var, exceptions_mod);
46951 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46952 exceptions_assign);
46953 }
46954 else
46955 *update = build2 (MODIFY_EXPR, integer_type_node,
46956 exceptions_var, exceptions_sse);
46957 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46958 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46959 ldmxcsr_update_call);
46960 }
46961 tree atomic_feraiseexcept
46962 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46963 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46964 1, exceptions_var);
46965 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46966 atomic_feraiseexcept_call);
46967 }
46968
46969 /* Initialize the GCC target structure. */
46970 #undef TARGET_RETURN_IN_MEMORY
46971 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46972
46973 #undef TARGET_LEGITIMIZE_ADDRESS
46974 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46975
46976 #undef TARGET_ATTRIBUTE_TABLE
46977 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46978 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46979 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46980 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46981 # undef TARGET_MERGE_DECL_ATTRIBUTES
46982 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46983 #endif
46984
46985 #undef TARGET_COMP_TYPE_ATTRIBUTES
46986 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46987
46988 #undef TARGET_INIT_BUILTINS
46989 #define TARGET_INIT_BUILTINS ix86_init_builtins
46990 #undef TARGET_BUILTIN_DECL
46991 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46992 #undef TARGET_EXPAND_BUILTIN
46993 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46994
46995 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46996 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46997 ix86_builtin_vectorized_function
46998
46999 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47000 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47001
47002 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47003 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47004
47005 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47006 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47007
47008 #undef TARGET_BUILTIN_RECIPROCAL
47009 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47010
47011 #undef TARGET_ASM_FUNCTION_EPILOGUE
47012 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47013
47014 #undef TARGET_ENCODE_SECTION_INFO
47015 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47016 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47017 #else
47018 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47019 #endif
47020
47021 #undef TARGET_ASM_OPEN_PAREN
47022 #define TARGET_ASM_OPEN_PAREN ""
47023 #undef TARGET_ASM_CLOSE_PAREN
47024 #define TARGET_ASM_CLOSE_PAREN ""
47025
47026 #undef TARGET_ASM_BYTE_OP
47027 #define TARGET_ASM_BYTE_OP ASM_BYTE
47028
47029 #undef TARGET_ASM_ALIGNED_HI_OP
47030 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47031 #undef TARGET_ASM_ALIGNED_SI_OP
47032 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47033 #ifdef ASM_QUAD
47034 #undef TARGET_ASM_ALIGNED_DI_OP
47035 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47036 #endif
47037
47038 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47039 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47040
47041 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47042 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47043
47044 #undef TARGET_ASM_UNALIGNED_HI_OP
47045 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47046 #undef TARGET_ASM_UNALIGNED_SI_OP
47047 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47048 #undef TARGET_ASM_UNALIGNED_DI_OP
47049 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47050
47051 #undef TARGET_PRINT_OPERAND
47052 #define TARGET_PRINT_OPERAND ix86_print_operand
47053 #undef TARGET_PRINT_OPERAND_ADDRESS
47054 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47055 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47056 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47057 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47058 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47059
47060 #undef TARGET_SCHED_INIT_GLOBAL
47061 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47062 #undef TARGET_SCHED_ADJUST_COST
47063 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47064 #undef TARGET_SCHED_ISSUE_RATE
47065 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47066 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47067 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47068 ia32_multipass_dfa_lookahead
47069 #undef TARGET_SCHED_MACRO_FUSION_P
47070 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47071 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47072 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47073
47074 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47075 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47076
47077 #undef TARGET_MEMMODEL_CHECK
47078 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47079
47080 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47081 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47082
47083 #ifdef HAVE_AS_TLS
47084 #undef TARGET_HAVE_TLS
47085 #define TARGET_HAVE_TLS true
47086 #endif
47087 #undef TARGET_CANNOT_FORCE_CONST_MEM
47088 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47089 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47090 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47091
47092 #undef TARGET_DELEGITIMIZE_ADDRESS
47093 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47094
47095 #undef TARGET_MS_BITFIELD_LAYOUT_P
47096 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47097
47098 #if TARGET_MACHO
47099 #undef TARGET_BINDS_LOCAL_P
47100 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47101 #endif
47102 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47103 #undef TARGET_BINDS_LOCAL_P
47104 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47105 #endif
47106
47107 #undef TARGET_ASM_OUTPUT_MI_THUNK
47108 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47109 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47110 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47111
47112 #undef TARGET_ASM_FILE_START
47113 #define TARGET_ASM_FILE_START x86_file_start
47114
47115 #undef TARGET_OPTION_OVERRIDE
47116 #define TARGET_OPTION_OVERRIDE ix86_option_override
47117
47118 #undef TARGET_REGISTER_MOVE_COST
47119 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47120 #undef TARGET_MEMORY_MOVE_COST
47121 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47122 #undef TARGET_RTX_COSTS
47123 #define TARGET_RTX_COSTS ix86_rtx_costs
47124 #undef TARGET_ADDRESS_COST
47125 #define TARGET_ADDRESS_COST ix86_address_cost
47126
47127 #undef TARGET_FIXED_CONDITION_CODE_REGS
47128 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47129 #undef TARGET_CC_MODES_COMPATIBLE
47130 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47131
47132 #undef TARGET_MACHINE_DEPENDENT_REORG
47133 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47134
47135 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47136 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47137
47138 #undef TARGET_BUILD_BUILTIN_VA_LIST
47139 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47140
47141 #undef TARGET_FOLD_BUILTIN
47142 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47143
47144 #undef TARGET_COMPARE_VERSION_PRIORITY
47145 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47146
47147 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47148 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47149 ix86_generate_version_dispatcher_body
47150
47151 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47152 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47153 ix86_get_function_versions_dispatcher
47154
47155 #undef TARGET_ENUM_VA_LIST_P
47156 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47157
47158 #undef TARGET_FN_ABI_VA_LIST
47159 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47160
47161 #undef TARGET_CANONICAL_VA_LIST_TYPE
47162 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47163
47164 #undef TARGET_EXPAND_BUILTIN_VA_START
47165 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47166
47167 #undef TARGET_MD_ASM_CLOBBERS
47168 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47169
47170 #undef TARGET_PROMOTE_PROTOTYPES
47171 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47172 #undef TARGET_SETUP_INCOMING_VARARGS
47173 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47174 #undef TARGET_MUST_PASS_IN_STACK
47175 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47176 #undef TARGET_FUNCTION_ARG_ADVANCE
47177 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47178 #undef TARGET_FUNCTION_ARG
47179 #define TARGET_FUNCTION_ARG ix86_function_arg
47180 #undef TARGET_FUNCTION_ARG_BOUNDARY
47181 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47182 #undef TARGET_PASS_BY_REFERENCE
47183 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47184 #undef TARGET_INTERNAL_ARG_POINTER
47185 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47186 #undef TARGET_UPDATE_STACK_BOUNDARY
47187 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47188 #undef TARGET_GET_DRAP_RTX
47189 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47190 #undef TARGET_STRICT_ARGUMENT_NAMING
47191 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47192 #undef TARGET_STATIC_CHAIN
47193 #define TARGET_STATIC_CHAIN ix86_static_chain
47194 #undef TARGET_TRAMPOLINE_INIT
47195 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47196 #undef TARGET_RETURN_POPS_ARGS
47197 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47198
47199 #undef TARGET_LEGITIMATE_COMBINED_INSN
47200 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47201
47202 #undef TARGET_ASAN_SHADOW_OFFSET
47203 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47204
47205 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47206 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47207
47208 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47209 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47210
47211 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47212 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47213
47214 #undef TARGET_C_MODE_FOR_SUFFIX
47215 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47216
47217 #ifdef HAVE_AS_TLS
47218 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47219 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47220 #endif
47221
47222 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47223 #undef TARGET_INSERT_ATTRIBUTES
47224 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47225 #endif
47226
47227 #undef TARGET_MANGLE_TYPE
47228 #define TARGET_MANGLE_TYPE ix86_mangle_type
47229
47230 #if !TARGET_MACHO
47231 #undef TARGET_STACK_PROTECT_FAIL
47232 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47233 #endif
47234
47235 #undef TARGET_FUNCTION_VALUE
47236 #define TARGET_FUNCTION_VALUE ix86_function_value
47237
47238 #undef TARGET_FUNCTION_VALUE_REGNO_P
47239 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47240
47241 #undef TARGET_PROMOTE_FUNCTION_MODE
47242 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47243
47244 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47245 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47246
47247 #undef TARGET_INSTANTIATE_DECLS
47248 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47249
47250 #undef TARGET_SECONDARY_RELOAD
47251 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47252
47253 #undef TARGET_CLASS_MAX_NREGS
47254 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47255
47256 #undef TARGET_PREFERRED_RELOAD_CLASS
47257 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47258 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47259 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47260 #undef TARGET_CLASS_LIKELY_SPILLED_P
47261 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47262
47263 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47264 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47265 ix86_builtin_vectorization_cost
47266 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47267 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47268 ix86_vectorize_vec_perm_const_ok
47269 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47270 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47271 ix86_preferred_simd_mode
47272 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47273 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47274 ix86_autovectorize_vector_sizes
47275 #undef TARGET_VECTORIZE_INIT_COST
47276 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47277 #undef TARGET_VECTORIZE_ADD_STMT_COST
47278 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47279 #undef TARGET_VECTORIZE_FINISH_COST
47280 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47281 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47282 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47283
47284 #undef TARGET_SET_CURRENT_FUNCTION
47285 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47286
47287 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47288 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47289
47290 #undef TARGET_OPTION_SAVE
47291 #define TARGET_OPTION_SAVE ix86_function_specific_save
47292
47293 #undef TARGET_OPTION_RESTORE
47294 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47295
47296 #undef TARGET_OPTION_PRINT
47297 #define TARGET_OPTION_PRINT ix86_function_specific_print
47298
47299 #undef TARGET_OPTION_FUNCTION_VERSIONS
47300 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47301
47302 #undef TARGET_CAN_INLINE_P
47303 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47304
47305 #undef TARGET_EXPAND_TO_RTL_HOOK
47306 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47307
47308 #undef TARGET_LEGITIMATE_ADDRESS_P
47309 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47310
47311 #undef TARGET_LRA_P
47312 #define TARGET_LRA_P hook_bool_void_true
47313
47314 #undef TARGET_REGISTER_PRIORITY
47315 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47316
47317 #undef TARGET_REGISTER_USAGE_LEVELING_P
47318 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47319
47320 #undef TARGET_LEGITIMATE_CONSTANT_P
47321 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47322
47323 #undef TARGET_FRAME_POINTER_REQUIRED
47324 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47325
47326 #undef TARGET_CAN_ELIMINATE
47327 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47328
47329 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47330 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47331
47332 #undef TARGET_ASM_CODE_END
47333 #define TARGET_ASM_CODE_END ix86_code_end
47334
47335 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47336 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47337
47338 #if TARGET_MACHO
47339 #undef TARGET_INIT_LIBFUNCS
47340 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47341 #endif
47342
47343 #undef TARGET_LOOP_UNROLL_ADJUST
47344 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47345
47346 #undef TARGET_SPILL_CLASS
47347 #define TARGET_SPILL_CLASS ix86_spill_class
47348
47349 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47350 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47351 ix86_simd_clone_compute_vecsize_and_simdlen
47352
47353 #undef TARGET_SIMD_CLONE_ADJUST
47354 #define TARGET_SIMD_CLONE_ADJUST \
47355 ix86_simd_clone_adjust
47356
47357 #undef TARGET_SIMD_CLONE_USABLE
47358 #define TARGET_SIMD_CLONE_USABLE \
47359 ix86_simd_clone_usable
47360
47361 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47362 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47363 ix86_float_exceptions_rounding_supported_p
47364
47365 #undef TARGET_MODE_EMIT
47366 #define TARGET_MODE_EMIT ix86_emit_mode_set
47367
47368 #undef TARGET_MODE_NEEDED
47369 #define TARGET_MODE_NEEDED ix86_mode_needed
47370
47371 #undef TARGET_MODE_AFTER
47372 #define TARGET_MODE_AFTER ix86_mode_after
47373
47374 #undef TARGET_MODE_ENTRY
47375 #define TARGET_MODE_ENTRY ix86_mode_entry
47376
47377 #undef TARGET_MODE_EXIT
47378 #define TARGET_MODE_EXIT ix86_mode_exit
47379
47380 #undef TARGET_MODE_PRIORITY
47381 #define TARGET_MODE_PRIORITY ix86_mode_priority
47382
47383 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47384 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47385
47386 struct gcc_target targetm = TARGET_INITIALIZER;
47387 \f
47388 #include "gt-i386.h"