i386.c (extended_reg_mentioned_1): Delete.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "cgraph.h"
68 #include "hash-table.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimple-expr.h"
74 #include "is-a.h"
75 #include "gimple.h"
76 #include "gimplify.h"
77 #include "cfgloop.h"
78 #include "dwarf2.h"
79 #include "df.h"
80 #include "tm-constrs.h"
81 #include "params.h"
82 #include "cselib.h"
83 #include "debug.h"
84 #include "sched-int.h"
85 #include "sbitmap.h"
86 #include "fibheap.h"
87 #include "opts.h"
88 #include "diagnostic.h"
89 #include "dumpfile.h"
90 #include "tree-pass.h"
91 #include "wide-int.h"
92 #include "context.h"
93 #include "pass_manager.h"
94 #include "target-globals.h"
95 #include "tree-vectorizer.h"
96 #include "shrink-wrap.h"
97 #include "builtins.h"
98 #include "rtl-iter.h"
99
100 static rtx legitimize_dllimport_symbol (rtx, bool);
101 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
102 static rtx legitimize_pe_coff_symbol (rtx, bool);
103
104 #ifndef CHECK_STACK_LIMIT
105 #define CHECK_STACK_LIMIT (-1)
106 #endif
107
108 /* Return index of given mode in mult and division cost tables. */
109 #define MODE_INDEX(mode) \
110 ((mode) == QImode ? 0 \
111 : (mode) == HImode ? 1 \
112 : (mode) == SImode ? 2 \
113 : (mode) == DImode ? 3 \
114 : 4)
115
116 /* Processor costs (relative to an add) */
117 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
118 #define COSTS_N_BYTES(N) ((N) * 2)
119
120 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
121
122 static stringop_algs ix86_size_memcpy[2] = {
123 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
124 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
125 static stringop_algs ix86_size_memset[2] = {
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
128
129 const
130 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
131 COSTS_N_BYTES (2), /* cost of an add instruction */
132 COSTS_N_BYTES (3), /* cost of a lea instruction */
133 COSTS_N_BYTES (2), /* variable shift costs */
134 COSTS_N_BYTES (3), /* constant shift costs */
135 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
136 COSTS_N_BYTES (3), /* HI */
137 COSTS_N_BYTES (3), /* SI */
138 COSTS_N_BYTES (3), /* DI */
139 COSTS_N_BYTES (5)}, /* other */
140 0, /* cost of multiply per each bit set */
141 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 COSTS_N_BYTES (3), /* cost of movsx */
147 COSTS_N_BYTES (3), /* cost of movzx */
148 0, /* "large" insn */
149 2, /* MOVE_RATIO */
150 2, /* cost for loading QImode using movzbl */
151 {2, 2, 2}, /* cost of loading integer registers
152 in QImode, HImode and SImode.
153 Relative to reg-reg move (2). */
154 {2, 2, 2}, /* cost of storing integer registers */
155 2, /* cost of reg,reg fld/fst */
156 {2, 2, 2}, /* cost of loading fp registers
157 in SFmode, DFmode and XFmode */
158 {2, 2, 2}, /* cost of storing fp registers
159 in SFmode, DFmode and XFmode */
160 3, /* cost of moving MMX register */
161 {3, 3}, /* cost of loading MMX registers
162 in SImode and DImode */
163 {3, 3}, /* cost of storing MMX registers
164 in SImode and DImode */
165 3, /* cost of moving SSE register */
166 {3, 3, 3}, /* cost of loading SSE registers
167 in SImode, DImode and TImode */
168 {3, 3, 3}, /* cost of storing SSE registers
169 in SImode, DImode and TImode */
170 3, /* MMX or SSE register to integer */
171 0, /* size of l1 cache */
172 0, /* size of l2 cache */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 2, /* Branch cost */
176 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
177 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
178 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
179 COSTS_N_BYTES (2), /* cost of FABS instruction. */
180 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
181 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
182 ix86_size_memcpy,
183 ix86_size_memset,
184 1, /* scalar_stmt_cost. */
185 1, /* scalar load_cost. */
186 1, /* scalar_store_cost. */
187 1, /* vec_stmt_cost. */
188 1, /* vec_to_scalar_cost. */
189 1, /* scalar_to_vec_cost. */
190 1, /* vec_align_load_cost. */
191 1, /* vec_unalign_load_cost. */
192 1, /* vec_store_cost. */
193 1, /* cond_taken_branch_cost. */
194 1, /* cond_not_taken_branch_cost. */
195 };
196
197 /* Processor costs (relative to an add) */
198 static stringop_algs i386_memcpy[2] = {
199 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
200 DUMMY_STRINGOP_ALGS};
201 static stringop_algs i386_memset[2] = {
202 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
203 DUMMY_STRINGOP_ALGS};
204
205 static const
206 struct processor_costs i386_cost = { /* 386 specific costs */
207 COSTS_N_INSNS (1), /* cost of an add instruction */
208 COSTS_N_INSNS (1), /* cost of a lea instruction */
209 COSTS_N_INSNS (3), /* variable shift costs */
210 COSTS_N_INSNS (2), /* constant shift costs */
211 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
212 COSTS_N_INSNS (6), /* HI */
213 COSTS_N_INSNS (6), /* SI */
214 COSTS_N_INSNS (6), /* DI */
215 COSTS_N_INSNS (6)}, /* other */
216 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
217 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
218 COSTS_N_INSNS (23), /* HI */
219 COSTS_N_INSNS (23), /* SI */
220 COSTS_N_INSNS (23), /* DI */
221 COSTS_N_INSNS (23)}, /* other */
222 COSTS_N_INSNS (3), /* cost of movsx */
223 COSTS_N_INSNS (2), /* cost of movzx */
224 15, /* "large" insn */
225 3, /* MOVE_RATIO */
226 4, /* cost for loading QImode using movzbl */
227 {2, 4, 2}, /* cost of loading integer registers
228 in QImode, HImode and SImode.
229 Relative to reg-reg move (2). */
230 {2, 4, 2}, /* cost of storing integer registers */
231 2, /* cost of reg,reg fld/fst */
232 {8, 8, 8}, /* cost of loading fp registers
233 in SFmode, DFmode and XFmode */
234 {8, 8, 8}, /* cost of storing fp registers
235 in SFmode, DFmode and XFmode */
236 2, /* cost of moving MMX register */
237 {4, 8}, /* cost of loading MMX registers
238 in SImode and DImode */
239 {4, 8}, /* cost of storing MMX registers
240 in SImode and DImode */
241 2, /* cost of moving SSE register */
242 {4, 8, 16}, /* cost of loading SSE registers
243 in SImode, DImode and TImode */
244 {4, 8, 16}, /* cost of storing SSE registers
245 in SImode, DImode and TImode */
246 3, /* MMX or SSE register to integer */
247 0, /* size of l1 cache */
248 0, /* size of l2 cache */
249 0, /* size of prefetch block */
250 0, /* number of parallel prefetches */
251 1, /* Branch cost */
252 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
253 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
254 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
255 COSTS_N_INSNS (22), /* cost of FABS instruction. */
256 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
257 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
258 i386_memcpy,
259 i386_memset,
260 1, /* scalar_stmt_cost. */
261 1, /* scalar load_cost. */
262 1, /* scalar_store_cost. */
263 1, /* vec_stmt_cost. */
264 1, /* vec_to_scalar_cost. */
265 1, /* scalar_to_vec_cost. */
266 1, /* vec_align_load_cost. */
267 2, /* vec_unalign_load_cost. */
268 1, /* vec_store_cost. */
269 3, /* cond_taken_branch_cost. */
270 1, /* cond_not_taken_branch_cost. */
271 };
272
273 static stringop_algs i486_memcpy[2] = {
274 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
275 DUMMY_STRINGOP_ALGS};
276 static stringop_algs i486_memset[2] = {
277 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
278 DUMMY_STRINGOP_ALGS};
279
280 static const
281 struct processor_costs i486_cost = { /* 486 specific costs */
282 COSTS_N_INSNS (1), /* cost of an add instruction */
283 COSTS_N_INSNS (1), /* cost of a lea instruction */
284 COSTS_N_INSNS (3), /* variable shift costs */
285 COSTS_N_INSNS (2), /* constant shift costs */
286 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
287 COSTS_N_INSNS (12), /* HI */
288 COSTS_N_INSNS (12), /* SI */
289 COSTS_N_INSNS (12), /* DI */
290 COSTS_N_INSNS (12)}, /* other */
291 1, /* cost of multiply per each bit set */
292 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
293 COSTS_N_INSNS (40), /* HI */
294 COSTS_N_INSNS (40), /* SI */
295 COSTS_N_INSNS (40), /* DI */
296 COSTS_N_INSNS (40)}, /* other */
297 COSTS_N_INSNS (3), /* cost of movsx */
298 COSTS_N_INSNS (2), /* cost of movzx */
299 15, /* "large" insn */
300 3, /* MOVE_RATIO */
301 4, /* cost for loading QImode using movzbl */
302 {2, 4, 2}, /* cost of loading integer registers
303 in QImode, HImode and SImode.
304 Relative to reg-reg move (2). */
305 {2, 4, 2}, /* cost of storing integer registers */
306 2, /* cost of reg,reg fld/fst */
307 {8, 8, 8}, /* cost of loading fp registers
308 in SFmode, DFmode and XFmode */
309 {8, 8, 8}, /* cost of storing fp registers
310 in SFmode, DFmode and XFmode */
311 2, /* cost of moving MMX register */
312 {4, 8}, /* cost of loading MMX registers
313 in SImode and DImode */
314 {4, 8}, /* cost of storing MMX registers
315 in SImode and DImode */
316 2, /* cost of moving SSE register */
317 {4, 8, 16}, /* cost of loading SSE registers
318 in SImode, DImode and TImode */
319 {4, 8, 16}, /* cost of storing SSE registers
320 in SImode, DImode and TImode */
321 3, /* MMX or SSE register to integer */
322 4, /* size of l1 cache. 486 has 8kB cache
323 shared for code and data, so 4kB is
324 not really precise. */
325 4, /* size of l2 cache */
326 0, /* size of prefetch block */
327 0, /* number of parallel prefetches */
328 1, /* Branch cost */
329 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (3), /* cost of FABS instruction. */
333 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
335 i486_memcpy,
336 i486_memset,
337 1, /* scalar_stmt_cost. */
338 1, /* scalar load_cost. */
339 1, /* scalar_store_cost. */
340 1, /* vec_stmt_cost. */
341 1, /* vec_to_scalar_cost. */
342 1, /* scalar_to_vec_cost. */
343 1, /* vec_align_load_cost. */
344 2, /* vec_unalign_load_cost. */
345 1, /* vec_store_cost. */
346 3, /* cond_taken_branch_cost. */
347 1, /* cond_not_taken_branch_cost. */
348 };
349
350 static stringop_algs pentium_memcpy[2] = {
351 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
352 DUMMY_STRINGOP_ALGS};
353 static stringop_algs pentium_memset[2] = {
354 {libcall, {{-1, rep_prefix_4_byte, false}}},
355 DUMMY_STRINGOP_ALGS};
356
357 static const
358 struct processor_costs pentium_cost = {
359 COSTS_N_INSNS (1), /* cost of an add instruction */
360 COSTS_N_INSNS (1), /* cost of a lea instruction */
361 COSTS_N_INSNS (4), /* variable shift costs */
362 COSTS_N_INSNS (1), /* constant shift costs */
363 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
364 COSTS_N_INSNS (11), /* HI */
365 COSTS_N_INSNS (11), /* SI */
366 COSTS_N_INSNS (11), /* DI */
367 COSTS_N_INSNS (11)}, /* other */
368 0, /* cost of multiply per each bit set */
369 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
370 COSTS_N_INSNS (25), /* HI */
371 COSTS_N_INSNS (25), /* SI */
372 COSTS_N_INSNS (25), /* DI */
373 COSTS_N_INSNS (25)}, /* other */
374 COSTS_N_INSNS (3), /* cost of movsx */
375 COSTS_N_INSNS (2), /* cost of movzx */
376 8, /* "large" insn */
377 6, /* MOVE_RATIO */
378 6, /* cost for loading QImode using movzbl */
379 {2, 4, 2}, /* cost of loading integer registers
380 in QImode, HImode and SImode.
381 Relative to reg-reg move (2). */
382 {2, 4, 2}, /* cost of storing integer registers */
383 2, /* cost of reg,reg fld/fst */
384 {2, 2, 6}, /* cost of loading fp registers
385 in SFmode, DFmode and XFmode */
386 {4, 4, 6}, /* cost of storing fp registers
387 in SFmode, DFmode and XFmode */
388 8, /* cost of moving MMX register */
389 {8, 8}, /* cost of loading MMX registers
390 in SImode and DImode */
391 {8, 8}, /* cost of storing MMX registers
392 in SImode and DImode */
393 2, /* cost of moving SSE register */
394 {4, 8, 16}, /* cost of loading SSE registers
395 in SImode, DImode and TImode */
396 {4, 8, 16}, /* cost of storing SSE registers
397 in SImode, DImode and TImode */
398 3, /* MMX or SSE register to integer */
399 8, /* size of l1 cache. */
400 8, /* size of l2 cache */
401 0, /* size of prefetch block */
402 0, /* number of parallel prefetches */
403 2, /* Branch cost */
404 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
405 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
406 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
407 COSTS_N_INSNS (1), /* cost of FABS instruction. */
408 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
409 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
410 pentium_memcpy,
411 pentium_memset,
412 1, /* scalar_stmt_cost. */
413 1, /* scalar load_cost. */
414 1, /* scalar_store_cost. */
415 1, /* vec_stmt_cost. */
416 1, /* vec_to_scalar_cost. */
417 1, /* scalar_to_vec_cost. */
418 1, /* vec_align_load_cost. */
419 2, /* vec_unalign_load_cost. */
420 1, /* vec_store_cost. */
421 3, /* cond_taken_branch_cost. */
422 1, /* cond_not_taken_branch_cost. */
423 };
424
425 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
426 (we ensure the alignment). For small blocks inline loop is still a
427 noticeable win, for bigger blocks either rep movsl or rep movsb is
428 way to go. Rep movsb has apparently more expensive startup time in CPU,
429 but after 4K the difference is down in the noise. */
430 static stringop_algs pentiumpro_memcpy[2] = {
431 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
432 {8192, rep_prefix_4_byte, false},
433 {-1, rep_prefix_1_byte, false}}},
434 DUMMY_STRINGOP_ALGS};
435 static stringop_algs pentiumpro_memset[2] = {
436 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
437 {8192, rep_prefix_4_byte, false},
438 {-1, libcall, false}}},
439 DUMMY_STRINGOP_ALGS};
440 static const
441 struct processor_costs pentiumpro_cost = {
442 COSTS_N_INSNS (1), /* cost of an add instruction */
443 COSTS_N_INSNS (1), /* cost of a lea instruction */
444 COSTS_N_INSNS (1), /* variable shift costs */
445 COSTS_N_INSNS (1), /* constant shift costs */
446 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
447 COSTS_N_INSNS (4), /* HI */
448 COSTS_N_INSNS (4), /* SI */
449 COSTS_N_INSNS (4), /* DI */
450 COSTS_N_INSNS (4)}, /* other */
451 0, /* cost of multiply per each bit set */
452 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
453 COSTS_N_INSNS (17), /* HI */
454 COSTS_N_INSNS (17), /* SI */
455 COSTS_N_INSNS (17), /* DI */
456 COSTS_N_INSNS (17)}, /* other */
457 COSTS_N_INSNS (1), /* cost of movsx */
458 COSTS_N_INSNS (1), /* cost of movzx */
459 8, /* "large" insn */
460 6, /* MOVE_RATIO */
461 2, /* cost for loading QImode using movzbl */
462 {4, 4, 4}, /* cost of loading integer registers
463 in QImode, HImode and SImode.
464 Relative to reg-reg move (2). */
465 {2, 2, 2}, /* cost of storing integer registers */
466 2, /* cost of reg,reg fld/fst */
467 {2, 2, 6}, /* cost of loading fp registers
468 in SFmode, DFmode and XFmode */
469 {4, 4, 6}, /* cost of storing fp registers
470 in SFmode, DFmode and XFmode */
471 2, /* cost of moving MMX register */
472 {2, 2}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {2, 2}, /* cost of storing MMX registers
475 in SImode and DImode */
476 2, /* cost of moving SSE register */
477 {2, 2, 8}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {2, 2, 8}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 3, /* MMX or SSE register to integer */
482 8, /* size of l1 cache. */
483 256, /* size of l2 cache */
484 32, /* size of prefetch block */
485 6, /* number of parallel prefetches */
486 2, /* Branch cost */
487 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
493 pentiumpro_memcpy,
494 pentiumpro_memset,
495 1, /* scalar_stmt_cost. */
496 1, /* scalar load_cost. */
497 1, /* scalar_store_cost. */
498 1, /* vec_stmt_cost. */
499 1, /* vec_to_scalar_cost. */
500 1, /* scalar_to_vec_cost. */
501 1, /* vec_align_load_cost. */
502 2, /* vec_unalign_load_cost. */
503 1, /* vec_store_cost. */
504 3, /* cond_taken_branch_cost. */
505 1, /* cond_not_taken_branch_cost. */
506 };
507
508 static stringop_algs geode_memcpy[2] = {
509 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
510 DUMMY_STRINGOP_ALGS};
511 static stringop_algs geode_memset[2] = {
512 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
513 DUMMY_STRINGOP_ALGS};
514 static const
515 struct processor_costs geode_cost = {
516 COSTS_N_INSNS (1), /* cost of an add instruction */
517 COSTS_N_INSNS (1), /* cost of a lea instruction */
518 COSTS_N_INSNS (2), /* variable shift costs */
519 COSTS_N_INSNS (1), /* constant shift costs */
520 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
521 COSTS_N_INSNS (4), /* HI */
522 COSTS_N_INSNS (7), /* SI */
523 COSTS_N_INSNS (7), /* DI */
524 COSTS_N_INSNS (7)}, /* other */
525 0, /* cost of multiply per each bit set */
526 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
527 COSTS_N_INSNS (23), /* HI */
528 COSTS_N_INSNS (39), /* SI */
529 COSTS_N_INSNS (39), /* DI */
530 COSTS_N_INSNS (39)}, /* other */
531 COSTS_N_INSNS (1), /* cost of movsx */
532 COSTS_N_INSNS (1), /* cost of movzx */
533 8, /* "large" insn */
534 4, /* MOVE_RATIO */
535 1, /* cost for loading QImode using movzbl */
536 {1, 1, 1}, /* cost of loading integer registers
537 in QImode, HImode and SImode.
538 Relative to reg-reg move (2). */
539 {1, 1, 1}, /* cost of storing integer registers */
540 1, /* cost of reg,reg fld/fst */
541 {1, 1, 1}, /* cost of loading fp registers
542 in SFmode, DFmode and XFmode */
543 {4, 6, 6}, /* cost of storing fp registers
544 in SFmode, DFmode and XFmode */
545
546 1, /* cost of moving MMX register */
547 {1, 1}, /* cost of loading MMX registers
548 in SImode and DImode */
549 {1, 1}, /* cost of storing MMX registers
550 in SImode and DImode */
551 1, /* cost of moving SSE register */
552 {1, 1, 1}, /* cost of loading SSE registers
553 in SImode, DImode and TImode */
554 {1, 1, 1}, /* cost of storing SSE registers
555 in SImode, DImode and TImode */
556 1, /* MMX or SSE register to integer */
557 64, /* size of l1 cache. */
558 128, /* size of l2 cache. */
559 32, /* size of prefetch block */
560 1, /* number of parallel prefetches */
561 1, /* Branch cost */
562 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
563 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
564 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
565 COSTS_N_INSNS (1), /* cost of FABS instruction. */
566 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
567 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
568 geode_memcpy,
569 geode_memset,
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static stringop_algs k6_memcpy[2] = {
584 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
585 DUMMY_STRINGOP_ALGS};
586 static stringop_algs k6_memset[2] = {
587 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
588 DUMMY_STRINGOP_ALGS};
589 static const
590 struct processor_costs k6_cost = {
591 COSTS_N_INSNS (1), /* cost of an add instruction */
592 COSTS_N_INSNS (2), /* cost of a lea instruction */
593 COSTS_N_INSNS (1), /* variable shift costs */
594 COSTS_N_INSNS (1), /* constant shift costs */
595 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
596 COSTS_N_INSNS (3), /* HI */
597 COSTS_N_INSNS (3), /* SI */
598 COSTS_N_INSNS (3), /* DI */
599 COSTS_N_INSNS (3)}, /* other */
600 0, /* cost of multiply per each bit set */
601 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
602 COSTS_N_INSNS (18), /* HI */
603 COSTS_N_INSNS (18), /* SI */
604 COSTS_N_INSNS (18), /* DI */
605 COSTS_N_INSNS (18)}, /* other */
606 COSTS_N_INSNS (2), /* cost of movsx */
607 COSTS_N_INSNS (2), /* cost of movzx */
608 8, /* "large" insn */
609 4, /* MOVE_RATIO */
610 3, /* cost for loading QImode using movzbl */
611 {4, 5, 4}, /* cost of loading integer registers
612 in QImode, HImode and SImode.
613 Relative to reg-reg move (2). */
614 {2, 3, 2}, /* cost of storing integer registers */
615 4, /* cost of reg,reg fld/fst */
616 {6, 6, 6}, /* cost of loading fp registers
617 in SFmode, DFmode and XFmode */
618 {4, 4, 4}, /* cost of storing fp registers
619 in SFmode, DFmode and XFmode */
620 2, /* cost of moving MMX register */
621 {2, 2}, /* cost of loading MMX registers
622 in SImode and DImode */
623 {2, 2}, /* cost of storing MMX registers
624 in SImode and DImode */
625 2, /* cost of moving SSE register */
626 {2, 2, 8}, /* cost of loading SSE registers
627 in SImode, DImode and TImode */
628 {2, 2, 8}, /* cost of storing SSE registers
629 in SImode, DImode and TImode */
630 6, /* MMX or SSE register to integer */
631 32, /* size of l1 cache. */
632 32, /* size of l2 cache. Some models
633 have integrated l2 cache, but
634 optimizing for k6 is not important
635 enough to worry about that. */
636 32, /* size of prefetch block */
637 1, /* number of parallel prefetches */
638 1, /* Branch cost */
639 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
645 k6_memcpy,
646 k6_memset,
647 1, /* scalar_stmt_cost. */
648 1, /* scalar load_cost. */
649 1, /* scalar_store_cost. */
650 1, /* vec_stmt_cost. */
651 1, /* vec_to_scalar_cost. */
652 1, /* scalar_to_vec_cost. */
653 1, /* vec_align_load_cost. */
654 2, /* vec_unalign_load_cost. */
655 1, /* vec_store_cost. */
656 3, /* cond_taken_branch_cost. */
657 1, /* cond_not_taken_branch_cost. */
658 };
659
660 /* For some reason, Athlon deals better with REP prefix (relative to loops)
661 compared to K8. Alignment becomes important after 8 bytes for memcpy and
662 128 bytes for memset. */
663 static stringop_algs athlon_memcpy[2] = {
664 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
665 DUMMY_STRINGOP_ALGS};
666 static stringop_algs athlon_memset[2] = {
667 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
668 DUMMY_STRINGOP_ALGS};
669 static const
670 struct processor_costs athlon_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (2), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (5), /* HI */
677 COSTS_N_INSNS (5), /* SI */
678 COSTS_N_INSNS (5), /* DI */
679 COSTS_N_INSNS (5)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (26), /* HI */
683 COSTS_N_INSNS (42), /* SI */
684 COSTS_N_INSNS (74), /* DI */
685 COSTS_N_INSNS (74)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 8, /* "large" insn */
689 9, /* MOVE_RATIO */
690 4, /* cost for loading QImode using movzbl */
691 {3, 4, 3}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {3, 4, 3}, /* cost of storing integer registers */
695 4, /* cost of reg,reg fld/fst */
696 {4, 4, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {6, 6, 8}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 2, /* cost of moving MMX register */
701 {4, 4}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {4, 4}, /* cost of storing MMX registers
704 in SImode and DImode */
705 2, /* cost of moving SSE register */
706 {4, 4, 6}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {4, 4, 5}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 5, /* MMX or SSE register to integer */
711 64, /* size of l1 cache. */
712 256, /* size of l2 cache. */
713 64, /* size of prefetch block */
714 6, /* number of parallel prefetches */
715 5, /* Branch cost */
716 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
717 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
718 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
719 COSTS_N_INSNS (2), /* cost of FABS instruction. */
720 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
721 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
722 athlon_memcpy,
723 athlon_memset,
724 1, /* scalar_stmt_cost. */
725 1, /* scalar load_cost. */
726 1, /* scalar_store_cost. */
727 1, /* vec_stmt_cost. */
728 1, /* vec_to_scalar_cost. */
729 1, /* scalar_to_vec_cost. */
730 1, /* vec_align_load_cost. */
731 2, /* vec_unalign_load_cost. */
732 1, /* vec_store_cost. */
733 3, /* cond_taken_branch_cost. */
734 1, /* cond_not_taken_branch_cost. */
735 };
736
737 /* K8 has optimized REP instruction for medium sized blocks, but for very
738 small blocks it is better to use loop. For large blocks, libcall can
739 do nontemporary accesses and beat inline considerably. */
740 static stringop_algs k8_memcpy[2] = {
741 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
742 {-1, rep_prefix_4_byte, false}}},
743 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
744 {-1, libcall, false}}}};
745 static stringop_algs k8_memset[2] = {
746 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
747 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
748 {libcall, {{48, unrolled_loop, false},
749 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
750 static const
751 struct processor_costs k8_cost = {
752 COSTS_N_INSNS (1), /* cost of an add instruction */
753 COSTS_N_INSNS (2), /* cost of a lea instruction */
754 COSTS_N_INSNS (1), /* variable shift costs */
755 COSTS_N_INSNS (1), /* constant shift costs */
756 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
757 COSTS_N_INSNS (4), /* HI */
758 COSTS_N_INSNS (3), /* SI */
759 COSTS_N_INSNS (4), /* DI */
760 COSTS_N_INSNS (5)}, /* other */
761 0, /* cost of multiply per each bit set */
762 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
763 COSTS_N_INSNS (26), /* HI */
764 COSTS_N_INSNS (42), /* SI */
765 COSTS_N_INSNS (74), /* DI */
766 COSTS_N_INSNS (74)}, /* other */
767 COSTS_N_INSNS (1), /* cost of movsx */
768 COSTS_N_INSNS (1), /* cost of movzx */
769 8, /* "large" insn */
770 9, /* MOVE_RATIO */
771 4, /* cost for loading QImode using movzbl */
772 {3, 4, 3}, /* cost of loading integer registers
773 in QImode, HImode and SImode.
774 Relative to reg-reg move (2). */
775 {3, 4, 3}, /* cost of storing integer registers */
776 4, /* cost of reg,reg fld/fst */
777 {4, 4, 12}, /* cost of loading fp registers
778 in SFmode, DFmode and XFmode */
779 {6, 6, 8}, /* cost of storing fp registers
780 in SFmode, DFmode and XFmode */
781 2, /* cost of moving MMX register */
782 {3, 3}, /* cost of loading MMX registers
783 in SImode and DImode */
784 {4, 4}, /* cost of storing MMX registers
785 in SImode and DImode */
786 2, /* cost of moving SSE register */
787 {4, 3, 6}, /* cost of loading SSE registers
788 in SImode, DImode and TImode */
789 {4, 4, 5}, /* cost of storing SSE registers
790 in SImode, DImode and TImode */
791 5, /* MMX or SSE register to integer */
792 64, /* size of l1 cache. */
793 512, /* size of l2 cache. */
794 64, /* size of prefetch block */
795 /* New AMD processors never drop prefetches; if they cannot be performed
796 immediately, they are queued. We set number of simultaneous prefetches
797 to a large constant to reflect this (it probably is not a good idea not
798 to limit number of prefetches at all, as their execution also takes some
799 time). */
800 100, /* number of parallel prefetches */
801 3, /* Branch cost */
802 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
803 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
804 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
805 COSTS_N_INSNS (2), /* cost of FABS instruction. */
806 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
807 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
808
809 k8_memcpy,
810 k8_memset,
811 4, /* scalar_stmt_cost. */
812 2, /* scalar load_cost. */
813 2, /* scalar_store_cost. */
814 5, /* vec_stmt_cost. */
815 0, /* vec_to_scalar_cost. */
816 2, /* scalar_to_vec_cost. */
817 2, /* vec_align_load_cost. */
818 3, /* vec_unalign_load_cost. */
819 3, /* vec_store_cost. */
820 3, /* cond_taken_branch_cost. */
821 2, /* cond_not_taken_branch_cost. */
822 };
823
824 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
825 very small blocks it is better to use loop. For large blocks, libcall can
826 do nontemporary accesses and beat inline considerably. */
827 static stringop_algs amdfam10_memcpy[2] = {
828 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
829 {-1, rep_prefix_4_byte, false}}},
830 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
831 {-1, libcall, false}}}};
832 static stringop_algs amdfam10_memset[2] = {
833 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
834 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
835 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
836 {-1, libcall, false}}}};
837 struct processor_costs amdfam10_cost = {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (2), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (3), /* SI */
845 COSTS_N_INSNS (4), /* DI */
846 COSTS_N_INSNS (5)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
856 9, /* MOVE_RATIO */
857 4, /* cost for loading QImode using movzbl */
858 {3, 4, 3}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {3, 4, 3}, /* cost of storing integer registers */
862 4, /* cost of reg,reg fld/fst */
863 {4, 4, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {6, 6, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {3, 3}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 3}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 5}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 3, /* MMX or SSE register to integer */
878 /* On K8:
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
881 On AMDFAM10:
882 MOVD reg64, xmmreg Double FADD 3
883 1/1 1/1
884 MOVD reg32, xmmreg Double FADD 3
885 1/1 1/1 */
886 64, /* size of l1 cache. */
887 512, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
893 time). */
894 100, /* number of parallel prefetches */
895 2, /* Branch cost */
896 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
902
903 amdfam10_memcpy,
904 amdfam10_memset,
905 4, /* scalar_stmt_cost. */
906 2, /* scalar load_cost. */
907 2, /* scalar_store_cost. */
908 6, /* vec_stmt_cost. */
909 0, /* vec_to_scalar_cost. */
910 2, /* scalar_to_vec_cost. */
911 2, /* vec_align_load_cost. */
912 2, /* vec_unalign_load_cost. */
913 2, /* vec_store_cost. */
914 2, /* cond_taken_branch_cost. */
915 1, /* cond_not_taken_branch_cost. */
916 };
917
918 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
919 very small blocks it is better to use loop. For large blocks, libcall
920 can do nontemporary accesses and beat inline considerably. */
921 static stringop_algs bdver1_memcpy[2] = {
922 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
923 {-1, rep_prefix_4_byte, false}}},
924 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
925 {-1, libcall, false}}}};
926 static stringop_algs bdver1_memset[2] = {
927 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
928 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
929 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
930 {-1, libcall, false}}}};
931
932 const struct processor_costs bdver1_cost = {
933 COSTS_N_INSNS (1), /* cost of an add instruction */
934 COSTS_N_INSNS (1), /* cost of a lea instruction */
935 COSTS_N_INSNS (1), /* variable shift costs */
936 COSTS_N_INSNS (1), /* constant shift costs */
937 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
938 COSTS_N_INSNS (4), /* HI */
939 COSTS_N_INSNS (4), /* SI */
940 COSTS_N_INSNS (6), /* DI */
941 COSTS_N_INSNS (6)}, /* other */
942 0, /* cost of multiply per each bit set */
943 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
944 COSTS_N_INSNS (35), /* HI */
945 COSTS_N_INSNS (51), /* SI */
946 COSTS_N_INSNS (83), /* DI */
947 COSTS_N_INSNS (83)}, /* other */
948 COSTS_N_INSNS (1), /* cost of movsx */
949 COSTS_N_INSNS (1), /* cost of movzx */
950 8, /* "large" insn */
951 9, /* MOVE_RATIO */
952 4, /* cost for loading QImode using movzbl */
953 {5, 5, 4}, /* cost of loading integer registers
954 in QImode, HImode and SImode.
955 Relative to reg-reg move (2). */
956 {4, 4, 4}, /* cost of storing integer registers */
957 2, /* cost of reg,reg fld/fst */
958 {5, 5, 12}, /* cost of loading fp registers
959 in SFmode, DFmode and XFmode */
960 {4, 4, 8}, /* cost of storing fp registers
961 in SFmode, DFmode and XFmode */
962 2, /* cost of moving MMX register */
963 {4, 4}, /* cost of loading MMX registers
964 in SImode and DImode */
965 {4, 4}, /* cost of storing MMX registers
966 in SImode and DImode */
967 2, /* cost of moving SSE register */
968 {4, 4, 4}, /* cost of loading SSE registers
969 in SImode, DImode and TImode */
970 {4, 4, 4}, /* cost of storing SSE registers
971 in SImode, DImode and TImode */
972 2, /* MMX or SSE register to integer */
973 /* On K8:
974 MOVD reg64, xmmreg Double FSTORE 4
975 MOVD reg32, xmmreg Double FSTORE 4
976 On AMDFAM10:
977 MOVD reg64, xmmreg Double FADD 3
978 1/1 1/1
979 MOVD reg32, xmmreg Double FADD 3
980 1/1 1/1 */
981 16, /* size of l1 cache. */
982 2048, /* size of l2 cache. */
983 64, /* size of prefetch block */
984 /* New AMD processors never drop prefetches; if they cannot be performed
985 immediately, they are queued. We set number of simultaneous prefetches
986 to a large constant to reflect this (it probably is not a good idea not
987 to limit number of prefetches at all, as their execution also takes some
988 time). */
989 100, /* number of parallel prefetches */
990 2, /* Branch cost */
991 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
992 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
993 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
994 COSTS_N_INSNS (2), /* cost of FABS instruction. */
995 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
996 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
997
998 bdver1_memcpy,
999 bdver1_memset,
1000 6, /* scalar_stmt_cost. */
1001 4, /* scalar load_cost. */
1002 4, /* scalar_store_cost. */
1003 6, /* vec_stmt_cost. */
1004 0, /* vec_to_scalar_cost. */
1005 2, /* scalar_to_vec_cost. */
1006 4, /* vec_align_load_cost. */
1007 4, /* vec_unalign_load_cost. */
1008 4, /* vec_store_cost. */
1009 2, /* cond_taken_branch_cost. */
1010 1, /* cond_not_taken_branch_cost. */
1011 };
1012
1013 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1014 very small blocks it is better to use loop. For large blocks, libcall
1015 can do nontemporary accesses and beat inline considerably. */
1016
1017 static stringop_algs bdver2_memcpy[2] = {
1018 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1019 {-1, rep_prefix_4_byte, false}}},
1020 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1021 {-1, libcall, false}}}};
1022 static stringop_algs bdver2_memset[2] = {
1023 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1024 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1025 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1026 {-1, libcall, false}}}};
1027
1028 const struct processor_costs bdver2_cost = {
1029 COSTS_N_INSNS (1), /* cost of an add instruction */
1030 COSTS_N_INSNS (1), /* cost of a lea instruction */
1031 COSTS_N_INSNS (1), /* variable shift costs */
1032 COSTS_N_INSNS (1), /* constant shift costs */
1033 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1034 COSTS_N_INSNS (4), /* HI */
1035 COSTS_N_INSNS (4), /* SI */
1036 COSTS_N_INSNS (6), /* DI */
1037 COSTS_N_INSNS (6)}, /* other */
1038 0, /* cost of multiply per each bit set */
1039 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1040 COSTS_N_INSNS (35), /* HI */
1041 COSTS_N_INSNS (51), /* SI */
1042 COSTS_N_INSNS (83), /* DI */
1043 COSTS_N_INSNS (83)}, /* other */
1044 COSTS_N_INSNS (1), /* cost of movsx */
1045 COSTS_N_INSNS (1), /* cost of movzx */
1046 8, /* "large" insn */
1047 9, /* MOVE_RATIO */
1048 4, /* cost for loading QImode using movzbl */
1049 {5, 5, 4}, /* cost of loading integer registers
1050 in QImode, HImode and SImode.
1051 Relative to reg-reg move (2). */
1052 {4, 4, 4}, /* cost of storing integer registers */
1053 2, /* cost of reg,reg fld/fst */
1054 {5, 5, 12}, /* cost of loading fp registers
1055 in SFmode, DFmode and XFmode */
1056 {4, 4, 8}, /* cost of storing fp registers
1057 in SFmode, DFmode and XFmode */
1058 2, /* cost of moving MMX register */
1059 {4, 4}, /* cost of loading MMX registers
1060 in SImode and DImode */
1061 {4, 4}, /* cost of storing MMX registers
1062 in SImode and DImode */
1063 2, /* cost of moving SSE register */
1064 {4, 4, 4}, /* cost of loading SSE registers
1065 in SImode, DImode and TImode */
1066 {4, 4, 4}, /* cost of storing SSE registers
1067 in SImode, DImode and TImode */
1068 2, /* MMX or SSE register to integer */
1069 /* On K8:
1070 MOVD reg64, xmmreg Double FSTORE 4
1071 MOVD reg32, xmmreg Double FSTORE 4
1072 On AMDFAM10:
1073 MOVD reg64, xmmreg Double FADD 3
1074 1/1 1/1
1075 MOVD reg32, xmmreg Double FADD 3
1076 1/1 1/1 */
1077 16, /* size of l1 cache. */
1078 2048, /* size of l2 cache. */
1079 64, /* size of prefetch block */
1080 /* New AMD processors never drop prefetches; if they cannot be performed
1081 immediately, they are queued. We set number of simultaneous prefetches
1082 to a large constant to reflect this (it probably is not a good idea not
1083 to limit number of prefetches at all, as their execution also takes some
1084 time). */
1085 100, /* number of parallel prefetches */
1086 2, /* Branch cost */
1087 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1088 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1089 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1090 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1091 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1092 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1093
1094 bdver2_memcpy,
1095 bdver2_memset,
1096 6, /* scalar_stmt_cost. */
1097 4, /* scalar load_cost. */
1098 4, /* scalar_store_cost. */
1099 6, /* vec_stmt_cost. */
1100 0, /* vec_to_scalar_cost. */
1101 2, /* scalar_to_vec_cost. */
1102 4, /* vec_align_load_cost. */
1103 4, /* vec_unalign_load_cost. */
1104 4, /* vec_store_cost. */
1105 2, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1107 };
1108
1109
1110 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1111 very small blocks it is better to use loop. For large blocks, libcall
1112 can do nontemporary accesses and beat inline considerably. */
1113 static stringop_algs bdver3_memcpy[2] = {
1114 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1115 {-1, rep_prefix_4_byte, false}}},
1116 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1117 {-1, libcall, false}}}};
1118 static stringop_algs bdver3_memset[2] = {
1119 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1120 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1121 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1122 {-1, libcall, false}}}};
1123 struct processor_costs bdver3_cost = {
1124 COSTS_N_INSNS (1), /* cost of an add instruction */
1125 COSTS_N_INSNS (1), /* cost of a lea instruction */
1126 COSTS_N_INSNS (1), /* variable shift costs */
1127 COSTS_N_INSNS (1), /* constant shift costs */
1128 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1129 COSTS_N_INSNS (4), /* HI */
1130 COSTS_N_INSNS (4), /* SI */
1131 COSTS_N_INSNS (6), /* DI */
1132 COSTS_N_INSNS (6)}, /* other */
1133 0, /* cost of multiply per each bit set */
1134 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1135 COSTS_N_INSNS (35), /* HI */
1136 COSTS_N_INSNS (51), /* SI */
1137 COSTS_N_INSNS (83), /* DI */
1138 COSTS_N_INSNS (83)}, /* other */
1139 COSTS_N_INSNS (1), /* cost of movsx */
1140 COSTS_N_INSNS (1), /* cost of movzx */
1141 8, /* "large" insn */
1142 9, /* MOVE_RATIO */
1143 4, /* cost for loading QImode using movzbl */
1144 {5, 5, 4}, /* cost of loading integer registers
1145 in QImode, HImode and SImode.
1146 Relative to reg-reg move (2). */
1147 {4, 4, 4}, /* cost of storing integer registers */
1148 2, /* cost of reg,reg fld/fst */
1149 {5, 5, 12}, /* cost of loading fp registers
1150 in SFmode, DFmode and XFmode */
1151 {4, 4, 8}, /* cost of storing fp registers
1152 in SFmode, DFmode and XFmode */
1153 2, /* cost of moving MMX register */
1154 {4, 4}, /* cost of loading MMX registers
1155 in SImode and DImode */
1156 {4, 4}, /* cost of storing MMX registers
1157 in SImode and DImode */
1158 2, /* cost of moving SSE register */
1159 {4, 4, 4}, /* cost of loading SSE registers
1160 in SImode, DImode and TImode */
1161 {4, 4, 4}, /* cost of storing SSE registers
1162 in SImode, DImode and TImode */
1163 2, /* MMX or SSE register to integer */
1164 16, /* size of l1 cache. */
1165 2048, /* size of l2 cache. */
1166 64, /* size of prefetch block */
1167 /* New AMD processors never drop prefetches; if they cannot be performed
1168 immediately, they are queued. We set number of simultaneous prefetches
1169 to a large constant to reflect this (it probably is not a good idea not
1170 to limit number of prefetches at all, as their execution also takes some
1171 time). */
1172 100, /* number of parallel prefetches */
1173 2, /* Branch cost */
1174 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1175 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1176 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1177 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1178 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1179 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1180
1181 bdver3_memcpy,
1182 bdver3_memset,
1183 6, /* scalar_stmt_cost. */
1184 4, /* scalar load_cost. */
1185 4, /* scalar_store_cost. */
1186 6, /* vec_stmt_cost. */
1187 0, /* vec_to_scalar_cost. */
1188 2, /* scalar_to_vec_cost. */
1189 4, /* vec_align_load_cost. */
1190 4, /* vec_unalign_load_cost. */
1191 4, /* vec_store_cost. */
1192 2, /* cond_taken_branch_cost. */
1193 1, /* cond_not_taken_branch_cost. */
1194 };
1195
1196 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1197 very small blocks it is better to use loop. For large blocks, libcall
1198 can do nontemporary accesses and beat inline considerably. */
1199 static stringop_algs bdver4_memcpy[2] = {
1200 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1201 {-1, rep_prefix_4_byte, false}}},
1202 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1203 {-1, libcall, false}}}};
1204 static stringop_algs bdver4_memset[2] = {
1205 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1206 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1207 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1208 {-1, libcall, false}}}};
1209 struct processor_costs bdver4_cost = {
1210 COSTS_N_INSNS (1), /* cost of an add instruction */
1211 COSTS_N_INSNS (1), /* cost of a lea instruction */
1212 COSTS_N_INSNS (1), /* variable shift costs */
1213 COSTS_N_INSNS (1), /* constant shift costs */
1214 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1215 COSTS_N_INSNS (4), /* HI */
1216 COSTS_N_INSNS (4), /* SI */
1217 COSTS_N_INSNS (6), /* DI */
1218 COSTS_N_INSNS (6)}, /* other */
1219 0, /* cost of multiply per each bit set */
1220 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1221 COSTS_N_INSNS (35), /* HI */
1222 COSTS_N_INSNS (51), /* SI */
1223 COSTS_N_INSNS (83), /* DI */
1224 COSTS_N_INSNS (83)}, /* other */
1225 COSTS_N_INSNS (1), /* cost of movsx */
1226 COSTS_N_INSNS (1), /* cost of movzx */
1227 8, /* "large" insn */
1228 9, /* MOVE_RATIO */
1229 4, /* cost for loading QImode using movzbl */
1230 {5, 5, 4}, /* cost of loading integer registers
1231 in QImode, HImode and SImode.
1232 Relative to reg-reg move (2). */
1233 {4, 4, 4}, /* cost of storing integer registers */
1234 2, /* cost of reg,reg fld/fst */
1235 {5, 5, 12}, /* cost of loading fp registers
1236 in SFmode, DFmode and XFmode */
1237 {4, 4, 8}, /* cost of storing fp registers
1238 in SFmode, DFmode and XFmode */
1239 2, /* cost of moving MMX register */
1240 {4, 4}, /* cost of loading MMX registers
1241 in SImode and DImode */
1242 {4, 4}, /* cost of storing MMX registers
1243 in SImode and DImode */
1244 2, /* cost of moving SSE register */
1245 {4, 4, 4}, /* cost of loading SSE registers
1246 in SImode, DImode and TImode */
1247 {4, 4, 4}, /* cost of storing SSE registers
1248 in SImode, DImode and TImode */
1249 2, /* MMX or SSE register to integer */
1250 16, /* size of l1 cache. */
1251 2048, /* size of l2 cache. */
1252 64, /* size of prefetch block */
1253 /* New AMD processors never drop prefetches; if they cannot be performed
1254 immediately, they are queued. We set number of simultaneous prefetches
1255 to a large constant to reflect this (it probably is not a good idea not
1256 to limit number of prefetches at all, as their execution also takes some
1257 time). */
1258 100, /* number of parallel prefetches */
1259 2, /* Branch cost */
1260 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1261 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1262 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1263 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1264 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1265 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1266
1267 bdver4_memcpy,
1268 bdver4_memset,
1269 6, /* scalar_stmt_cost. */
1270 4, /* scalar load_cost. */
1271 4, /* scalar_store_cost. */
1272 6, /* vec_stmt_cost. */
1273 0, /* vec_to_scalar_cost. */
1274 2, /* scalar_to_vec_cost. */
1275 4, /* vec_align_load_cost. */
1276 4, /* vec_unalign_load_cost. */
1277 4, /* vec_store_cost. */
1278 2, /* cond_taken_branch_cost. */
1279 1, /* cond_not_taken_branch_cost. */
1280 };
1281
1282 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1283 very small blocks it is better to use loop. For large blocks, libcall can
1284 do nontemporary accesses and beat inline considerably. */
1285 static stringop_algs btver1_memcpy[2] = {
1286 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1287 {-1, rep_prefix_4_byte, false}}},
1288 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1289 {-1, libcall, false}}}};
1290 static stringop_algs btver1_memset[2] = {
1291 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1292 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1293 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1294 {-1, libcall, false}}}};
1295 const struct processor_costs btver1_cost = {
1296 COSTS_N_INSNS (1), /* cost of an add instruction */
1297 COSTS_N_INSNS (2), /* cost of a lea instruction */
1298 COSTS_N_INSNS (1), /* variable shift costs */
1299 COSTS_N_INSNS (1), /* constant shift costs */
1300 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1301 COSTS_N_INSNS (4), /* HI */
1302 COSTS_N_INSNS (3), /* SI */
1303 COSTS_N_INSNS (4), /* DI */
1304 COSTS_N_INSNS (5)}, /* other */
1305 0, /* cost of multiply per each bit set */
1306 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1307 COSTS_N_INSNS (35), /* HI */
1308 COSTS_N_INSNS (51), /* SI */
1309 COSTS_N_INSNS (83), /* DI */
1310 COSTS_N_INSNS (83)}, /* other */
1311 COSTS_N_INSNS (1), /* cost of movsx */
1312 COSTS_N_INSNS (1), /* cost of movzx */
1313 8, /* "large" insn */
1314 9, /* MOVE_RATIO */
1315 4, /* cost for loading QImode using movzbl */
1316 {3, 4, 3}, /* cost of loading integer registers
1317 in QImode, HImode and SImode.
1318 Relative to reg-reg move (2). */
1319 {3, 4, 3}, /* cost of storing integer registers */
1320 4, /* cost of reg,reg fld/fst */
1321 {4, 4, 12}, /* cost of loading fp registers
1322 in SFmode, DFmode and XFmode */
1323 {6, 6, 8}, /* cost of storing fp registers
1324 in SFmode, DFmode and XFmode */
1325 2, /* cost of moving MMX register */
1326 {3, 3}, /* cost of loading MMX registers
1327 in SImode and DImode */
1328 {4, 4}, /* cost of storing MMX registers
1329 in SImode and DImode */
1330 2, /* cost of moving SSE register */
1331 {4, 4, 3}, /* cost of loading SSE registers
1332 in SImode, DImode and TImode */
1333 {4, 4, 5}, /* cost of storing SSE registers
1334 in SImode, DImode and TImode */
1335 3, /* MMX or SSE register to integer */
1336 /* On K8:
1337 MOVD reg64, xmmreg Double FSTORE 4
1338 MOVD reg32, xmmreg Double FSTORE 4
1339 On AMDFAM10:
1340 MOVD reg64, xmmreg Double FADD 3
1341 1/1 1/1
1342 MOVD reg32, xmmreg Double FADD 3
1343 1/1 1/1 */
1344 32, /* size of l1 cache. */
1345 512, /* size of l2 cache. */
1346 64, /* size of prefetch block */
1347 100, /* number of parallel prefetches */
1348 2, /* Branch cost */
1349 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1350 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1351 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1352 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1353 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1354 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1355
1356 btver1_memcpy,
1357 btver1_memset,
1358 4, /* scalar_stmt_cost. */
1359 2, /* scalar load_cost. */
1360 2, /* scalar_store_cost. */
1361 6, /* vec_stmt_cost. */
1362 0, /* vec_to_scalar_cost. */
1363 2, /* scalar_to_vec_cost. */
1364 2, /* vec_align_load_cost. */
1365 2, /* vec_unalign_load_cost. */
1366 2, /* vec_store_cost. */
1367 2, /* cond_taken_branch_cost. */
1368 1, /* cond_not_taken_branch_cost. */
1369 };
1370
1371 static stringop_algs btver2_memcpy[2] = {
1372 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1373 {-1, rep_prefix_4_byte, false}}},
1374 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1375 {-1, libcall, false}}}};
1376 static stringop_algs btver2_memset[2] = {
1377 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1378 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1379 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1380 {-1, libcall, false}}}};
1381 const struct processor_costs btver2_cost = {
1382 COSTS_N_INSNS (1), /* cost of an add instruction */
1383 COSTS_N_INSNS (2), /* cost of a lea instruction */
1384 COSTS_N_INSNS (1), /* variable shift costs */
1385 COSTS_N_INSNS (1), /* constant shift costs */
1386 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1387 COSTS_N_INSNS (4), /* HI */
1388 COSTS_N_INSNS (3), /* SI */
1389 COSTS_N_INSNS (4), /* DI */
1390 COSTS_N_INSNS (5)}, /* other */
1391 0, /* cost of multiply per each bit set */
1392 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1393 COSTS_N_INSNS (35), /* HI */
1394 COSTS_N_INSNS (51), /* SI */
1395 COSTS_N_INSNS (83), /* DI */
1396 COSTS_N_INSNS (83)}, /* other */
1397 COSTS_N_INSNS (1), /* cost of movsx */
1398 COSTS_N_INSNS (1), /* cost of movzx */
1399 8, /* "large" insn */
1400 9, /* MOVE_RATIO */
1401 4, /* cost for loading QImode using movzbl */
1402 {3, 4, 3}, /* cost of loading integer registers
1403 in QImode, HImode and SImode.
1404 Relative to reg-reg move (2). */
1405 {3, 4, 3}, /* cost of storing integer registers */
1406 4, /* cost of reg,reg fld/fst */
1407 {4, 4, 12}, /* cost of loading fp registers
1408 in SFmode, DFmode and XFmode */
1409 {6, 6, 8}, /* cost of storing fp registers
1410 in SFmode, DFmode and XFmode */
1411 2, /* cost of moving MMX register */
1412 {3, 3}, /* cost of loading MMX registers
1413 in SImode and DImode */
1414 {4, 4}, /* cost of storing MMX registers
1415 in SImode and DImode */
1416 2, /* cost of moving SSE register */
1417 {4, 4, 3}, /* cost of loading SSE registers
1418 in SImode, DImode and TImode */
1419 {4, 4, 5}, /* cost of storing SSE registers
1420 in SImode, DImode and TImode */
1421 3, /* MMX or SSE register to integer */
1422 /* On K8:
1423 MOVD reg64, xmmreg Double FSTORE 4
1424 MOVD reg32, xmmreg Double FSTORE 4
1425 On AMDFAM10:
1426 MOVD reg64, xmmreg Double FADD 3
1427 1/1 1/1
1428 MOVD reg32, xmmreg Double FADD 3
1429 1/1 1/1 */
1430 32, /* size of l1 cache. */
1431 2048, /* size of l2 cache. */
1432 64, /* size of prefetch block */
1433 100, /* number of parallel prefetches */
1434 2, /* Branch cost */
1435 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1436 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1437 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1440 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1441 btver2_memcpy,
1442 btver2_memset,
1443 4, /* scalar_stmt_cost. */
1444 2, /* scalar load_cost. */
1445 2, /* scalar_store_cost. */
1446 6, /* vec_stmt_cost. */
1447 0, /* vec_to_scalar_cost. */
1448 2, /* scalar_to_vec_cost. */
1449 2, /* vec_align_load_cost. */
1450 2, /* vec_unalign_load_cost. */
1451 2, /* vec_store_cost. */
1452 2, /* cond_taken_branch_cost. */
1453 1, /* cond_not_taken_branch_cost. */
1454 };
1455
1456 static stringop_algs pentium4_memcpy[2] = {
1457 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1458 DUMMY_STRINGOP_ALGS};
1459 static stringop_algs pentium4_memset[2] = {
1460 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1461 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1462 DUMMY_STRINGOP_ALGS};
1463
1464 static const
1465 struct processor_costs pentium4_cost = {
1466 COSTS_N_INSNS (1), /* cost of an add instruction */
1467 COSTS_N_INSNS (3), /* cost of a lea instruction */
1468 COSTS_N_INSNS (4), /* variable shift costs */
1469 COSTS_N_INSNS (4), /* constant shift costs */
1470 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1471 COSTS_N_INSNS (15), /* HI */
1472 COSTS_N_INSNS (15), /* SI */
1473 COSTS_N_INSNS (15), /* DI */
1474 COSTS_N_INSNS (15)}, /* other */
1475 0, /* cost of multiply per each bit set */
1476 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1477 COSTS_N_INSNS (56), /* HI */
1478 COSTS_N_INSNS (56), /* SI */
1479 COSTS_N_INSNS (56), /* DI */
1480 COSTS_N_INSNS (56)}, /* other */
1481 COSTS_N_INSNS (1), /* cost of movsx */
1482 COSTS_N_INSNS (1), /* cost of movzx */
1483 16, /* "large" insn */
1484 6, /* MOVE_RATIO */
1485 2, /* cost for loading QImode using movzbl */
1486 {4, 5, 4}, /* cost of loading integer registers
1487 in QImode, HImode and SImode.
1488 Relative to reg-reg move (2). */
1489 {2, 3, 2}, /* cost of storing integer registers */
1490 2, /* cost of reg,reg fld/fst */
1491 {2, 2, 6}, /* cost of loading fp registers
1492 in SFmode, DFmode and XFmode */
1493 {4, 4, 6}, /* cost of storing fp registers
1494 in SFmode, DFmode and XFmode */
1495 2, /* cost of moving MMX register */
1496 {2, 2}, /* cost of loading MMX registers
1497 in SImode and DImode */
1498 {2, 2}, /* cost of storing MMX registers
1499 in SImode and DImode */
1500 12, /* cost of moving SSE register */
1501 {12, 12, 12}, /* cost of loading SSE registers
1502 in SImode, DImode and TImode */
1503 {2, 2, 8}, /* cost of storing SSE registers
1504 in SImode, DImode and TImode */
1505 10, /* MMX or SSE register to integer */
1506 8, /* size of l1 cache. */
1507 256, /* size of l2 cache. */
1508 64, /* size of prefetch block */
1509 6, /* number of parallel prefetches */
1510 2, /* Branch cost */
1511 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1512 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1513 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1514 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1515 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1516 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1517 pentium4_memcpy,
1518 pentium4_memset,
1519 1, /* scalar_stmt_cost. */
1520 1, /* scalar load_cost. */
1521 1, /* scalar_store_cost. */
1522 1, /* vec_stmt_cost. */
1523 1, /* vec_to_scalar_cost. */
1524 1, /* scalar_to_vec_cost. */
1525 1, /* vec_align_load_cost. */
1526 2, /* vec_unalign_load_cost. */
1527 1, /* vec_store_cost. */
1528 3, /* cond_taken_branch_cost. */
1529 1, /* cond_not_taken_branch_cost. */
1530 };
1531
1532 static stringop_algs nocona_memcpy[2] = {
1533 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1534 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1535 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1536
1537 static stringop_algs nocona_memset[2] = {
1538 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1539 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1540 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1541 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1542
1543 static const
1544 struct processor_costs nocona_cost = {
1545 COSTS_N_INSNS (1), /* cost of an add instruction */
1546 COSTS_N_INSNS (1), /* cost of a lea instruction */
1547 COSTS_N_INSNS (1), /* variable shift costs */
1548 COSTS_N_INSNS (1), /* constant shift costs */
1549 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1550 COSTS_N_INSNS (10), /* HI */
1551 COSTS_N_INSNS (10), /* SI */
1552 COSTS_N_INSNS (10), /* DI */
1553 COSTS_N_INSNS (10)}, /* other */
1554 0, /* cost of multiply per each bit set */
1555 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1556 COSTS_N_INSNS (66), /* HI */
1557 COSTS_N_INSNS (66), /* SI */
1558 COSTS_N_INSNS (66), /* DI */
1559 COSTS_N_INSNS (66)}, /* other */
1560 COSTS_N_INSNS (1), /* cost of movsx */
1561 COSTS_N_INSNS (1), /* cost of movzx */
1562 16, /* "large" insn */
1563 17, /* MOVE_RATIO */
1564 4, /* cost for loading QImode using movzbl */
1565 {4, 4, 4}, /* cost of loading integer registers
1566 in QImode, HImode and SImode.
1567 Relative to reg-reg move (2). */
1568 {4, 4, 4}, /* cost of storing integer registers */
1569 3, /* cost of reg,reg fld/fst */
1570 {12, 12, 12}, /* cost of loading fp registers
1571 in SFmode, DFmode and XFmode */
1572 {4, 4, 4}, /* cost of storing fp registers
1573 in SFmode, DFmode and XFmode */
1574 6, /* cost of moving MMX register */
1575 {12, 12}, /* cost of loading MMX registers
1576 in SImode and DImode */
1577 {12, 12}, /* cost of storing MMX registers
1578 in SImode and DImode */
1579 6, /* cost of moving SSE register */
1580 {12, 12, 12}, /* cost of loading SSE registers
1581 in SImode, DImode and TImode */
1582 {12, 12, 12}, /* cost of storing SSE registers
1583 in SImode, DImode and TImode */
1584 8, /* MMX or SSE register to integer */
1585 8, /* size of l1 cache. */
1586 1024, /* size of l2 cache. */
1587 64, /* size of prefetch block */
1588 8, /* number of parallel prefetches */
1589 1, /* Branch cost */
1590 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1591 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1592 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1593 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1594 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1595 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1596 nocona_memcpy,
1597 nocona_memset,
1598 1, /* scalar_stmt_cost. */
1599 1, /* scalar load_cost. */
1600 1, /* scalar_store_cost. */
1601 1, /* vec_stmt_cost. */
1602 1, /* vec_to_scalar_cost. */
1603 1, /* scalar_to_vec_cost. */
1604 1, /* vec_align_load_cost. */
1605 2, /* vec_unalign_load_cost. */
1606 1, /* vec_store_cost. */
1607 3, /* cond_taken_branch_cost. */
1608 1, /* cond_not_taken_branch_cost. */
1609 };
1610
1611 static stringop_algs atom_memcpy[2] = {
1612 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1613 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1614 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1615 static stringop_algs atom_memset[2] = {
1616 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1617 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1618 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1619 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1620 static const
1621 struct processor_costs atom_cost = {
1622 COSTS_N_INSNS (1), /* cost of an add instruction */
1623 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1624 COSTS_N_INSNS (1), /* variable shift costs */
1625 COSTS_N_INSNS (1), /* constant shift costs */
1626 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1627 COSTS_N_INSNS (4), /* HI */
1628 COSTS_N_INSNS (3), /* SI */
1629 COSTS_N_INSNS (4), /* DI */
1630 COSTS_N_INSNS (2)}, /* other */
1631 0, /* cost of multiply per each bit set */
1632 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1633 COSTS_N_INSNS (26), /* HI */
1634 COSTS_N_INSNS (42), /* SI */
1635 COSTS_N_INSNS (74), /* DI */
1636 COSTS_N_INSNS (74)}, /* other */
1637 COSTS_N_INSNS (1), /* cost of movsx */
1638 COSTS_N_INSNS (1), /* cost of movzx */
1639 8, /* "large" insn */
1640 17, /* MOVE_RATIO */
1641 4, /* cost for loading QImode using movzbl */
1642 {4, 4, 4}, /* cost of loading integer registers
1643 in QImode, HImode and SImode.
1644 Relative to reg-reg move (2). */
1645 {4, 4, 4}, /* cost of storing integer registers */
1646 4, /* cost of reg,reg fld/fst */
1647 {12, 12, 12}, /* cost of loading fp registers
1648 in SFmode, DFmode and XFmode */
1649 {6, 6, 8}, /* cost of storing fp registers
1650 in SFmode, DFmode and XFmode */
1651 2, /* cost of moving MMX register */
1652 {8, 8}, /* cost of loading MMX registers
1653 in SImode and DImode */
1654 {8, 8}, /* cost of storing MMX registers
1655 in SImode and DImode */
1656 2, /* cost of moving SSE register */
1657 {8, 8, 8}, /* cost of loading SSE registers
1658 in SImode, DImode and TImode */
1659 {8, 8, 8}, /* cost of storing SSE registers
1660 in SImode, DImode and TImode */
1661 5, /* MMX or SSE register to integer */
1662 32, /* size of l1 cache. */
1663 256, /* size of l2 cache. */
1664 64, /* size of prefetch block */
1665 6, /* number of parallel prefetches */
1666 3, /* Branch cost */
1667 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1668 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1669 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1670 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1671 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1672 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1673 atom_memcpy,
1674 atom_memset,
1675 1, /* scalar_stmt_cost. */
1676 1, /* scalar load_cost. */
1677 1, /* scalar_store_cost. */
1678 1, /* vec_stmt_cost. */
1679 1, /* vec_to_scalar_cost. */
1680 1, /* scalar_to_vec_cost. */
1681 1, /* vec_align_load_cost. */
1682 2, /* vec_unalign_load_cost. */
1683 1, /* vec_store_cost. */
1684 3, /* cond_taken_branch_cost. */
1685 1, /* cond_not_taken_branch_cost. */
1686 };
1687
1688 static stringop_algs slm_memcpy[2] = {
1689 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1690 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1691 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1692 static stringop_algs slm_memset[2] = {
1693 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1694 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1695 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1696 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1697 static const
1698 struct processor_costs slm_cost = {
1699 COSTS_N_INSNS (1), /* cost of an add instruction */
1700 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1701 COSTS_N_INSNS (1), /* variable shift costs */
1702 COSTS_N_INSNS (1), /* constant shift costs */
1703 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1704 COSTS_N_INSNS (3), /* HI */
1705 COSTS_N_INSNS (3), /* SI */
1706 COSTS_N_INSNS (4), /* DI */
1707 COSTS_N_INSNS (2)}, /* other */
1708 0, /* cost of multiply per each bit set */
1709 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1710 COSTS_N_INSNS (26), /* HI */
1711 COSTS_N_INSNS (42), /* SI */
1712 COSTS_N_INSNS (74), /* DI */
1713 COSTS_N_INSNS (74)}, /* other */
1714 COSTS_N_INSNS (1), /* cost of movsx */
1715 COSTS_N_INSNS (1), /* cost of movzx */
1716 8, /* "large" insn */
1717 17, /* MOVE_RATIO */
1718 4, /* cost for loading QImode using movzbl */
1719 {4, 4, 4}, /* cost of loading integer registers
1720 in QImode, HImode and SImode.
1721 Relative to reg-reg move (2). */
1722 {4, 4, 4}, /* cost of storing integer registers */
1723 4, /* cost of reg,reg fld/fst */
1724 {12, 12, 12}, /* cost of loading fp registers
1725 in SFmode, DFmode and XFmode */
1726 {6, 6, 8}, /* cost of storing fp registers
1727 in SFmode, DFmode and XFmode */
1728 2, /* cost of moving MMX register */
1729 {8, 8}, /* cost of loading MMX registers
1730 in SImode and DImode */
1731 {8, 8}, /* cost of storing MMX registers
1732 in SImode and DImode */
1733 2, /* cost of moving SSE register */
1734 {8, 8, 8}, /* cost of loading SSE registers
1735 in SImode, DImode and TImode */
1736 {8, 8, 8}, /* cost of storing SSE registers
1737 in SImode, DImode and TImode */
1738 5, /* MMX or SSE register to integer */
1739 32, /* size of l1 cache. */
1740 256, /* size of l2 cache. */
1741 64, /* size of prefetch block */
1742 6, /* number of parallel prefetches */
1743 3, /* Branch cost */
1744 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1745 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1746 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1747 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1748 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1749 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1750 slm_memcpy,
1751 slm_memset,
1752 1, /* scalar_stmt_cost. */
1753 1, /* scalar load_cost. */
1754 1, /* scalar_store_cost. */
1755 1, /* vec_stmt_cost. */
1756 4, /* vec_to_scalar_cost. */
1757 1, /* scalar_to_vec_cost. */
1758 1, /* vec_align_load_cost. */
1759 2, /* vec_unalign_load_cost. */
1760 1, /* vec_store_cost. */
1761 3, /* cond_taken_branch_cost. */
1762 1, /* cond_not_taken_branch_cost. */
1763 };
1764
1765 static stringop_algs intel_memcpy[2] = {
1766 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1767 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1769 static stringop_algs intel_memset[2] = {
1770 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1771 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1772 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1773 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1774 static const
1775 struct processor_costs intel_cost = {
1776 COSTS_N_INSNS (1), /* cost of an add instruction */
1777 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1778 COSTS_N_INSNS (1), /* variable shift costs */
1779 COSTS_N_INSNS (1), /* constant shift costs */
1780 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1781 COSTS_N_INSNS (3), /* HI */
1782 COSTS_N_INSNS (3), /* SI */
1783 COSTS_N_INSNS (4), /* DI */
1784 COSTS_N_INSNS (2)}, /* other */
1785 0, /* cost of multiply per each bit set */
1786 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1787 COSTS_N_INSNS (26), /* HI */
1788 COSTS_N_INSNS (42), /* SI */
1789 COSTS_N_INSNS (74), /* DI */
1790 COSTS_N_INSNS (74)}, /* other */
1791 COSTS_N_INSNS (1), /* cost of movsx */
1792 COSTS_N_INSNS (1), /* cost of movzx */
1793 8, /* "large" insn */
1794 17, /* MOVE_RATIO */
1795 4, /* cost for loading QImode using movzbl */
1796 {4, 4, 4}, /* cost of loading integer registers
1797 in QImode, HImode and SImode.
1798 Relative to reg-reg move (2). */
1799 {4, 4, 4}, /* cost of storing integer registers */
1800 4, /* cost of reg,reg fld/fst */
1801 {12, 12, 12}, /* cost of loading fp registers
1802 in SFmode, DFmode and XFmode */
1803 {6, 6, 8}, /* cost of storing fp registers
1804 in SFmode, DFmode and XFmode */
1805 2, /* cost of moving MMX register */
1806 {8, 8}, /* cost of loading MMX registers
1807 in SImode and DImode */
1808 {8, 8}, /* cost of storing MMX registers
1809 in SImode and DImode */
1810 2, /* cost of moving SSE register */
1811 {8, 8, 8}, /* cost of loading SSE registers
1812 in SImode, DImode and TImode */
1813 {8, 8, 8}, /* cost of storing SSE registers
1814 in SImode, DImode and TImode */
1815 5, /* MMX or SSE register to integer */
1816 32, /* size of l1 cache. */
1817 256, /* size of l2 cache. */
1818 64, /* size of prefetch block */
1819 6, /* number of parallel prefetches */
1820 3, /* Branch cost */
1821 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1822 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1823 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1824 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1825 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1826 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1827 intel_memcpy,
1828 intel_memset,
1829 1, /* scalar_stmt_cost. */
1830 1, /* scalar load_cost. */
1831 1, /* scalar_store_cost. */
1832 1, /* vec_stmt_cost. */
1833 4, /* vec_to_scalar_cost. */
1834 1, /* scalar_to_vec_cost. */
1835 1, /* vec_align_load_cost. */
1836 2, /* vec_unalign_load_cost. */
1837 1, /* vec_store_cost. */
1838 3, /* cond_taken_branch_cost. */
1839 1, /* cond_not_taken_branch_cost. */
1840 };
1841
1842 /* Generic should produce code tuned for Core-i7 (and newer chips)
1843 and btver1 (and newer chips). */
1844
1845 static stringop_algs generic_memcpy[2] = {
1846 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1847 {-1, libcall, false}}},
1848 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1849 {-1, libcall, false}}}};
1850 static stringop_algs generic_memset[2] = {
1851 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1852 {-1, libcall, false}}},
1853 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1854 {-1, libcall, false}}}};
1855 static const
1856 struct processor_costs generic_cost = {
1857 COSTS_N_INSNS (1), /* cost of an add instruction */
1858 /* On all chips taken into consideration lea is 2 cycles and more. With
1859 this cost however our current implementation of synth_mult results in
1860 use of unnecessary temporary registers causing regression on several
1861 SPECfp benchmarks. */
1862 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1863 COSTS_N_INSNS (1), /* variable shift costs */
1864 COSTS_N_INSNS (1), /* constant shift costs */
1865 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1866 COSTS_N_INSNS (4), /* HI */
1867 COSTS_N_INSNS (3), /* SI */
1868 COSTS_N_INSNS (4), /* DI */
1869 COSTS_N_INSNS (2)}, /* other */
1870 0, /* cost of multiply per each bit set */
1871 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1872 COSTS_N_INSNS (26), /* HI */
1873 COSTS_N_INSNS (42), /* SI */
1874 COSTS_N_INSNS (74), /* DI */
1875 COSTS_N_INSNS (74)}, /* other */
1876 COSTS_N_INSNS (1), /* cost of movsx */
1877 COSTS_N_INSNS (1), /* cost of movzx */
1878 8, /* "large" insn */
1879 17, /* MOVE_RATIO */
1880 4, /* cost for loading QImode using movzbl */
1881 {4, 4, 4}, /* cost of loading integer registers
1882 in QImode, HImode and SImode.
1883 Relative to reg-reg move (2). */
1884 {4, 4, 4}, /* cost of storing integer registers */
1885 4, /* cost of reg,reg fld/fst */
1886 {12, 12, 12}, /* cost of loading fp registers
1887 in SFmode, DFmode and XFmode */
1888 {6, 6, 8}, /* cost of storing fp registers
1889 in SFmode, DFmode and XFmode */
1890 2, /* cost of moving MMX register */
1891 {8, 8}, /* cost of loading MMX registers
1892 in SImode and DImode */
1893 {8, 8}, /* cost of storing MMX registers
1894 in SImode and DImode */
1895 2, /* cost of moving SSE register */
1896 {8, 8, 8}, /* cost of loading SSE registers
1897 in SImode, DImode and TImode */
1898 {8, 8, 8}, /* cost of storing SSE registers
1899 in SImode, DImode and TImode */
1900 5, /* MMX or SSE register to integer */
1901 32, /* size of l1 cache. */
1902 512, /* size of l2 cache. */
1903 64, /* size of prefetch block */
1904 6, /* number of parallel prefetches */
1905 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1906 value is increased to perhaps more appropriate value of 5. */
1907 3, /* Branch cost */
1908 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1909 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1910 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1911 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1912 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1913 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1914 generic_memcpy,
1915 generic_memset,
1916 1, /* scalar_stmt_cost. */
1917 1, /* scalar load_cost. */
1918 1, /* scalar_store_cost. */
1919 1, /* vec_stmt_cost. */
1920 1, /* vec_to_scalar_cost. */
1921 1, /* scalar_to_vec_cost. */
1922 1, /* vec_align_load_cost. */
1923 2, /* vec_unalign_load_cost. */
1924 1, /* vec_store_cost. */
1925 3, /* cond_taken_branch_cost. */
1926 1, /* cond_not_taken_branch_cost. */
1927 };
1928
1929 /* core_cost should produce code tuned for Core familly of CPUs. */
1930 static stringop_algs core_memcpy[2] = {
1931 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1932 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1933 {-1, libcall, false}}}};
1934 static stringop_algs core_memset[2] = {
1935 {libcall, {{6, loop_1_byte, true},
1936 {24, loop, true},
1937 {8192, rep_prefix_4_byte, true},
1938 {-1, libcall, false}}},
1939 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1940 {-1, libcall, false}}}};
1941
1942 static const
1943 struct processor_costs core_cost = {
1944 COSTS_N_INSNS (1), /* cost of an add instruction */
1945 /* On all chips taken into consideration lea is 2 cycles and more. With
1946 this cost however our current implementation of synth_mult results in
1947 use of unnecessary temporary registers causing regression on several
1948 SPECfp benchmarks. */
1949 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1950 COSTS_N_INSNS (1), /* variable shift costs */
1951 COSTS_N_INSNS (1), /* constant shift costs */
1952 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1953 COSTS_N_INSNS (4), /* HI */
1954 COSTS_N_INSNS (3), /* SI */
1955 COSTS_N_INSNS (4), /* DI */
1956 COSTS_N_INSNS (2)}, /* other */
1957 0, /* cost of multiply per each bit set */
1958 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1959 COSTS_N_INSNS (26), /* HI */
1960 COSTS_N_INSNS (42), /* SI */
1961 COSTS_N_INSNS (74), /* DI */
1962 COSTS_N_INSNS (74)}, /* other */
1963 COSTS_N_INSNS (1), /* cost of movsx */
1964 COSTS_N_INSNS (1), /* cost of movzx */
1965 8, /* "large" insn */
1966 17, /* MOVE_RATIO */
1967 4, /* cost for loading QImode using movzbl */
1968 {4, 4, 4}, /* cost of loading integer registers
1969 in QImode, HImode and SImode.
1970 Relative to reg-reg move (2). */
1971 {4, 4, 4}, /* cost of storing integer registers */
1972 4, /* cost of reg,reg fld/fst */
1973 {12, 12, 12}, /* cost of loading fp registers
1974 in SFmode, DFmode and XFmode */
1975 {6, 6, 8}, /* cost of storing fp registers
1976 in SFmode, DFmode and XFmode */
1977 2, /* cost of moving MMX register */
1978 {8, 8}, /* cost of loading MMX registers
1979 in SImode and DImode */
1980 {8, 8}, /* cost of storing MMX registers
1981 in SImode and DImode */
1982 2, /* cost of moving SSE register */
1983 {8, 8, 8}, /* cost of loading SSE registers
1984 in SImode, DImode and TImode */
1985 {8, 8, 8}, /* cost of storing SSE registers
1986 in SImode, DImode and TImode */
1987 5, /* MMX or SSE register to integer */
1988 64, /* size of l1 cache. */
1989 512, /* size of l2 cache. */
1990 64, /* size of prefetch block */
1991 6, /* number of parallel prefetches */
1992 /* FIXME perhaps more appropriate value is 5. */
1993 3, /* Branch cost */
1994 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1995 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1996 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1997 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1998 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1999 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2000 core_memcpy,
2001 core_memset,
2002 1, /* scalar_stmt_cost. */
2003 1, /* scalar load_cost. */
2004 1, /* scalar_store_cost. */
2005 1, /* vec_stmt_cost. */
2006 1, /* vec_to_scalar_cost. */
2007 1, /* scalar_to_vec_cost. */
2008 1, /* vec_align_load_cost. */
2009 2, /* vec_unalign_load_cost. */
2010 1, /* vec_store_cost. */
2011 3, /* cond_taken_branch_cost. */
2012 1, /* cond_not_taken_branch_cost. */
2013 };
2014
2015
2016 /* Set by -mtune. */
2017 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2018
2019 /* Set by -mtune or -Os. */
2020 const struct processor_costs *ix86_cost = &pentium_cost;
2021
2022 /* Processor feature/optimization bitmasks. */
2023 #define m_386 (1<<PROCESSOR_I386)
2024 #define m_486 (1<<PROCESSOR_I486)
2025 #define m_PENT (1<<PROCESSOR_PENTIUM)
2026 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2027 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2028 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2029 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2030 #define m_CORE2 (1<<PROCESSOR_CORE2)
2031 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2032 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2033 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2034 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2035 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2036 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2037 #define m_INTEL (1<<PROCESSOR_INTEL)
2038
2039 #define m_GEODE (1<<PROCESSOR_GEODE)
2040 #define m_K6 (1<<PROCESSOR_K6)
2041 #define m_K6_GEODE (m_K6 | m_GEODE)
2042 #define m_K8 (1<<PROCESSOR_K8)
2043 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2044 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2045 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2046 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2047 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2048 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2049 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2050 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2051 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2052 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2053 #define m_BTVER (m_BTVER1 | m_BTVER2)
2054 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2055
2056 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2057
2058 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) name,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various tunings. */
2066 unsigned char ix86_tune_features[X86_TUNE_LAST];
2067
2068 /* Feature tests against the various tunings used to create ix86_tune_features
2069 based on the processor mask. */
2070 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2071 #undef DEF_TUNE
2072 #define DEF_TUNE(tune, name, selector) selector,
2073 #include "x86-tune.def"
2074 #undef DEF_TUNE
2075 };
2076
2077 /* Feature tests against the various architecture variations. */
2078 unsigned char ix86_arch_features[X86_ARCH_LAST];
2079
2080 /* Feature tests against the various architecture variations, used to create
2081 ix86_arch_features based on the processor mask. */
2082 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2083 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2084 ~(m_386 | m_486 | m_PENT | m_K6),
2085
2086 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2087 ~m_386,
2088
2089 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2090 ~(m_386 | m_486),
2091
2092 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2093 ~m_386,
2094
2095 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2096 ~m_386,
2097 };
2098
2099 /* In case the average insn count for single function invocation is
2100 lower than this constant, emit fast (but longer) prologue and
2101 epilogue code. */
2102 #define FAST_PROLOGUE_INSN_COUNT 20
2103
2104 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2105 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2106 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2107 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2108
2109 /* Array of the smallest class containing reg number REGNO, indexed by
2110 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2111
2112 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2113 {
2114 /* ax, dx, cx, bx */
2115 AREG, DREG, CREG, BREG,
2116 /* si, di, bp, sp */
2117 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2118 /* FP registers */
2119 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2120 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2121 /* arg pointer */
2122 NON_Q_REGS,
2123 /* flags, fpsr, fpcr, frame */
2124 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2125 /* SSE registers */
2126 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2127 SSE_REGS, SSE_REGS,
2128 /* MMX registers */
2129 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2130 MMX_REGS, MMX_REGS,
2131 /* REX registers */
2132 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2133 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2134 /* SSE REX registers */
2135 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2136 SSE_REGS, SSE_REGS,
2137 /* AVX-512 SSE registers */
2138 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2139 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2140 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2141 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2142 /* Mask registers. */
2143 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2144 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2145 };
2146
2147 /* The "default" register map used in 32bit mode. */
2148
2149 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2150 {
2151 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2152 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2153 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2154 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2155 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2156 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2157 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2158 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2159 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2160 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2161 };
2162
2163 /* The "default" register map used in 64bit mode. */
2164
2165 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2166 {
2167 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2168 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2169 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2170 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2171 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2172 8,9,10,11,12,13,14,15, /* extended integer registers */
2173 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2174 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2175 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2176 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2177 };
2178
2179 /* Define the register numbers to be used in Dwarf debugging information.
2180 The SVR4 reference port C compiler uses the following register numbers
2181 in its Dwarf output code:
2182 0 for %eax (gcc regno = 0)
2183 1 for %ecx (gcc regno = 2)
2184 2 for %edx (gcc regno = 1)
2185 3 for %ebx (gcc regno = 3)
2186 4 for %esp (gcc regno = 7)
2187 5 for %ebp (gcc regno = 6)
2188 6 for %esi (gcc regno = 4)
2189 7 for %edi (gcc regno = 5)
2190 The following three DWARF register numbers are never generated by
2191 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2192 believes these numbers have these meanings.
2193 8 for %eip (no gcc equivalent)
2194 9 for %eflags (gcc regno = 17)
2195 10 for %trapno (no gcc equivalent)
2196 It is not at all clear how we should number the FP stack registers
2197 for the x86 architecture. If the version of SDB on x86/svr4 were
2198 a bit less brain dead with respect to floating-point then we would
2199 have a precedent to follow with respect to DWARF register numbers
2200 for x86 FP registers, but the SDB on x86/svr4 is so completely
2201 broken with respect to FP registers that it is hardly worth thinking
2202 of it as something to strive for compatibility with.
2203 The version of x86/svr4 SDB I have at the moment does (partially)
2204 seem to believe that DWARF register number 11 is associated with
2205 the x86 register %st(0), but that's about all. Higher DWARF
2206 register numbers don't seem to be associated with anything in
2207 particular, and even for DWARF regno 11, SDB only seems to under-
2208 stand that it should say that a variable lives in %st(0) (when
2209 asked via an `=' command) if we said it was in DWARF regno 11,
2210 but SDB still prints garbage when asked for the value of the
2211 variable in question (via a `/' command).
2212 (Also note that the labels SDB prints for various FP stack regs
2213 when doing an `x' command are all wrong.)
2214 Note that these problems generally don't affect the native SVR4
2215 C compiler because it doesn't allow the use of -O with -g and
2216 because when it is *not* optimizing, it allocates a memory
2217 location for each floating-point variable, and the memory
2218 location is what gets described in the DWARF AT_location
2219 attribute for the variable in question.
2220 Regardless of the severe mental illness of the x86/svr4 SDB, we
2221 do something sensible here and we use the following DWARF
2222 register numbers. Note that these are all stack-top-relative
2223 numbers.
2224 11 for %st(0) (gcc regno = 8)
2225 12 for %st(1) (gcc regno = 9)
2226 13 for %st(2) (gcc regno = 10)
2227 14 for %st(3) (gcc regno = 11)
2228 15 for %st(4) (gcc regno = 12)
2229 16 for %st(5) (gcc regno = 13)
2230 17 for %st(6) (gcc regno = 14)
2231 18 for %st(7) (gcc regno = 15)
2232 */
2233 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2234 {
2235 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2236 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2237 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2238 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2239 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2240 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2241 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2242 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2243 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2244 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2245 };
2246
2247 /* Define parameter passing and return registers. */
2248
2249 static int const x86_64_int_parameter_registers[6] =
2250 {
2251 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2252 };
2253
2254 static int const x86_64_ms_abi_int_parameter_registers[4] =
2255 {
2256 CX_REG, DX_REG, R8_REG, R9_REG
2257 };
2258
2259 static int const x86_64_int_return_registers[4] =
2260 {
2261 AX_REG, DX_REG, DI_REG, SI_REG
2262 };
2263
2264 /* Additional registers that are clobbered by SYSV calls. */
2265
2266 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2267 {
2268 SI_REG, DI_REG,
2269 XMM6_REG, XMM7_REG,
2270 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2271 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2272 };
2273
2274 /* Define the structure for the machine field in struct function. */
2275
2276 struct GTY(()) stack_local_entry {
2277 unsigned short mode;
2278 unsigned short n;
2279 rtx rtl;
2280 struct stack_local_entry *next;
2281 };
2282
2283 /* Structure describing stack frame layout.
2284 Stack grows downward:
2285
2286 [arguments]
2287 <- ARG_POINTER
2288 saved pc
2289
2290 saved static chain if ix86_static_chain_on_stack
2291
2292 saved frame pointer if frame_pointer_needed
2293 <- HARD_FRAME_POINTER
2294 [saved regs]
2295 <- regs_save_offset
2296 [padding0]
2297
2298 [saved SSE regs]
2299 <- sse_regs_save_offset
2300 [padding1] |
2301 | <- FRAME_POINTER
2302 [va_arg registers] |
2303 |
2304 [frame] |
2305 |
2306 [padding2] | = to_allocate
2307 <- STACK_POINTER
2308 */
2309 struct ix86_frame
2310 {
2311 int nsseregs;
2312 int nregs;
2313 int va_arg_size;
2314 int red_zone_size;
2315 int outgoing_arguments_size;
2316
2317 /* The offsets relative to ARG_POINTER. */
2318 HOST_WIDE_INT frame_pointer_offset;
2319 HOST_WIDE_INT hard_frame_pointer_offset;
2320 HOST_WIDE_INT stack_pointer_offset;
2321 HOST_WIDE_INT hfp_save_offset;
2322 HOST_WIDE_INT reg_save_offset;
2323 HOST_WIDE_INT sse_reg_save_offset;
2324
2325 /* When save_regs_using_mov is set, emit prologue using
2326 move instead of push instructions. */
2327 bool save_regs_using_mov;
2328 };
2329
2330 /* Which cpu are we scheduling for. */
2331 enum attr_cpu ix86_schedule;
2332
2333 /* Which cpu are we optimizing for. */
2334 enum processor_type ix86_tune;
2335
2336 /* Which instruction set architecture to use. */
2337 enum processor_type ix86_arch;
2338
2339 /* True if processor has SSE prefetch instruction. */
2340 unsigned char x86_prefetch_sse;
2341
2342 /* -mstackrealign option */
2343 static const char ix86_force_align_arg_pointer_string[]
2344 = "force_align_arg_pointer";
2345
2346 static rtx (*ix86_gen_leave) (void);
2347 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2349 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2350 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2351 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2352 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2353 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2354 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2355 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2357 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2358
2359 /* Preferred alignment for stack boundary in bits. */
2360 unsigned int ix86_preferred_stack_boundary;
2361
2362 /* Alignment for incoming stack boundary in bits specified at
2363 command line. */
2364 static unsigned int ix86_user_incoming_stack_boundary;
2365
2366 /* Default alignment for incoming stack boundary in bits. */
2367 static unsigned int ix86_default_incoming_stack_boundary;
2368
2369 /* Alignment for incoming stack boundary in bits. */
2370 unsigned int ix86_incoming_stack_boundary;
2371
2372 /* Calling abi specific va_list type nodes. */
2373 static GTY(()) tree sysv_va_list_type_node;
2374 static GTY(()) tree ms_va_list_type_node;
2375
2376 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2377 char internal_label_prefix[16];
2378 int internal_label_prefix_len;
2379
2380 /* Fence to use after loop using movnt. */
2381 tree x86_mfence;
2382
2383 /* Register class used for passing given 64bit part of the argument.
2384 These represent classes as documented by the PS ABI, with the exception
2385 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2386 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2387
2388 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2389 whenever possible (upper half does contain padding). */
2390 enum x86_64_reg_class
2391 {
2392 X86_64_NO_CLASS,
2393 X86_64_INTEGER_CLASS,
2394 X86_64_INTEGERSI_CLASS,
2395 X86_64_SSE_CLASS,
2396 X86_64_SSESF_CLASS,
2397 X86_64_SSEDF_CLASS,
2398 X86_64_SSEUP_CLASS,
2399 X86_64_X87_CLASS,
2400 X86_64_X87UP_CLASS,
2401 X86_64_COMPLEX_X87_CLASS,
2402 X86_64_MEMORY_CLASS
2403 };
2404
2405 #define MAX_CLASSES 8
2406
2407 /* Table of constants used by fldpi, fldln2, etc.... */
2408 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2409 static bool ext_80387_constants_init = 0;
2410
2411 \f
2412 static struct machine_function * ix86_init_machine_status (void);
2413 static rtx ix86_function_value (const_tree, const_tree, bool);
2414 static bool ix86_function_value_regno_p (const unsigned int);
2415 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2416 const_tree);
2417 static rtx ix86_static_chain (const_tree, bool);
2418 static int ix86_function_regparm (const_tree, const_tree);
2419 static void ix86_compute_frame_layout (struct ix86_frame *);
2420 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2421 rtx, rtx, int);
2422 static void ix86_add_new_builtins (HOST_WIDE_INT);
2423 static tree ix86_canonical_va_list_type (tree);
2424 static void predict_jump (int);
2425 static unsigned int split_stack_prologue_scratch_regno (void);
2426 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2427
2428 enum ix86_function_specific_strings
2429 {
2430 IX86_FUNCTION_SPECIFIC_ARCH,
2431 IX86_FUNCTION_SPECIFIC_TUNE,
2432 IX86_FUNCTION_SPECIFIC_MAX
2433 };
2434
2435 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2436 const char *, enum fpmath_unit, bool);
2437 static void ix86_function_specific_save (struct cl_target_option *,
2438 struct gcc_options *opts);
2439 static void ix86_function_specific_restore (struct gcc_options *opts,
2440 struct cl_target_option *);
2441 static void ix86_function_specific_print (FILE *, int,
2442 struct cl_target_option *);
2443 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2444 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2445 struct gcc_options *,
2446 struct gcc_options *,
2447 struct gcc_options *);
2448 static bool ix86_can_inline_p (tree, tree);
2449 static void ix86_set_current_function (tree);
2450 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2451
2452 static enum calling_abi ix86_function_abi (const_tree);
2453
2454 \f
2455 #ifndef SUBTARGET32_DEFAULT_CPU
2456 #define SUBTARGET32_DEFAULT_CPU "i386"
2457 #endif
2458
2459 /* Whether -mtune= or -march= were specified */
2460 static int ix86_tune_defaulted;
2461 static int ix86_arch_specified;
2462
2463 /* Vectorization library interface and handlers. */
2464 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2465
2466 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2467 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2468
2469 /* Processor target table, indexed by processor number */
2470 struct ptt
2471 {
2472 const char *const name; /* processor name */
2473 const struct processor_costs *cost; /* Processor costs */
2474 const int align_loop; /* Default alignments. */
2475 const int align_loop_max_skip;
2476 const int align_jump;
2477 const int align_jump_max_skip;
2478 const int align_func;
2479 };
2480
2481 /* This table must be in sync with enum processor_type in i386.h. */
2482 static const struct ptt processor_target_table[PROCESSOR_max] =
2483 {
2484 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2485 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2486 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2487 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2488 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2489 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2490 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2491 {"core2", &core_cost, 16, 10, 16, 10, 16},
2492 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2493 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2494 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2495 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2496 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2497 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2498 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2499 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2500 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2501 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2502 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2503 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2504 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2505 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2506 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2507 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2508 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2509 };
2510 \f
2511 static unsigned int
2512 rest_of_handle_insert_vzeroupper (void)
2513 {
2514 int i;
2515
2516 /* vzeroupper instructions are inserted immediately after reload to
2517 account for possible spills from 256bit registers. The pass
2518 reuses mode switching infrastructure by re-running mode insertion
2519 pass, so disable entities that have already been processed. */
2520 for (i = 0; i < MAX_386_ENTITIES; i++)
2521 ix86_optimize_mode_switching[i] = 0;
2522
2523 ix86_optimize_mode_switching[AVX_U128] = 1;
2524
2525 /* Call optimize_mode_switching. */
2526 g->get_passes ()->execute_pass_mode_switching ();
2527 return 0;
2528 }
2529
2530 namespace {
2531
2532 const pass_data pass_data_insert_vzeroupper =
2533 {
2534 RTL_PASS, /* type */
2535 "vzeroupper", /* name */
2536 OPTGROUP_NONE, /* optinfo_flags */
2537 TV_NONE, /* tv_id */
2538 0, /* properties_required */
2539 0, /* properties_provided */
2540 0, /* properties_destroyed */
2541 0, /* todo_flags_start */
2542 TODO_df_finish, /* todo_flags_finish */
2543 };
2544
2545 class pass_insert_vzeroupper : public rtl_opt_pass
2546 {
2547 public:
2548 pass_insert_vzeroupper(gcc::context *ctxt)
2549 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2550 {}
2551
2552 /* opt_pass methods: */
2553 virtual bool gate (function *)
2554 {
2555 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2556 }
2557
2558 virtual unsigned int execute (function *)
2559 {
2560 return rest_of_handle_insert_vzeroupper ();
2561 }
2562
2563 }; // class pass_insert_vzeroupper
2564
2565 } // anon namespace
2566
2567 rtl_opt_pass *
2568 make_pass_insert_vzeroupper (gcc::context *ctxt)
2569 {
2570 return new pass_insert_vzeroupper (ctxt);
2571 }
2572
2573 /* Return true if a red-zone is in use. */
2574
2575 static inline bool
2576 ix86_using_red_zone (void)
2577 {
2578 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2579 }
2580 \f
2581 /* Return a string that documents the current -m options. The caller is
2582 responsible for freeing the string. */
2583
2584 static char *
2585 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2586 const char *tune, enum fpmath_unit fpmath,
2587 bool add_nl_p)
2588 {
2589 struct ix86_target_opts
2590 {
2591 const char *option; /* option string */
2592 HOST_WIDE_INT mask; /* isa mask options */
2593 };
2594
2595 /* This table is ordered so that options like -msse4.2 that imply
2596 preceding options while match those first. */
2597 static struct ix86_target_opts isa_opts[] =
2598 {
2599 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2600 { "-mfma", OPTION_MASK_ISA_FMA },
2601 { "-mxop", OPTION_MASK_ISA_XOP },
2602 { "-mlwp", OPTION_MASK_ISA_LWP },
2603 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2604 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2605 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2606 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2607 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2608 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2609 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2610 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2611 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2612 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2613 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2614 { "-msse3", OPTION_MASK_ISA_SSE3 },
2615 { "-msse2", OPTION_MASK_ISA_SSE2 },
2616 { "-msse", OPTION_MASK_ISA_SSE },
2617 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2618 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2619 { "-mmmx", OPTION_MASK_ISA_MMX },
2620 { "-mabm", OPTION_MASK_ISA_ABM },
2621 { "-mbmi", OPTION_MASK_ISA_BMI },
2622 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2623 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2624 { "-mhle", OPTION_MASK_ISA_HLE },
2625 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2626 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2627 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2628 { "-madx", OPTION_MASK_ISA_ADX },
2629 { "-mtbm", OPTION_MASK_ISA_TBM },
2630 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2631 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2632 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2633 { "-maes", OPTION_MASK_ISA_AES },
2634 { "-msha", OPTION_MASK_ISA_SHA },
2635 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2636 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2637 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2638 { "-mf16c", OPTION_MASK_ISA_F16C },
2639 { "-mrtm", OPTION_MASK_ISA_RTM },
2640 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2641 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2642 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2643 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2644 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2645 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2646 };
2647
2648 /* Flag options. */
2649 static struct ix86_target_opts flag_opts[] =
2650 {
2651 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2652 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2653 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2654 { "-m80387", MASK_80387 },
2655 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2656 { "-malign-double", MASK_ALIGN_DOUBLE },
2657 { "-mcld", MASK_CLD },
2658 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2659 { "-mieee-fp", MASK_IEEE_FP },
2660 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2661 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2662 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2663 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2664 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2665 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2666 { "-mno-red-zone", MASK_NO_RED_ZONE },
2667 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2668 { "-mrecip", MASK_RECIP },
2669 { "-mrtd", MASK_RTD },
2670 { "-msseregparm", MASK_SSEREGPARM },
2671 { "-mstack-arg-probe", MASK_STACK_PROBE },
2672 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2673 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2674 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2675 { "-mvzeroupper", MASK_VZEROUPPER },
2676 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2677 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2678 { "-mprefer-avx128", MASK_PREFER_AVX128},
2679 };
2680
2681 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2682
2683 char isa_other[40];
2684 char target_other[40];
2685 unsigned num = 0;
2686 unsigned i, j;
2687 char *ret;
2688 char *ptr;
2689 size_t len;
2690 size_t line_len;
2691 size_t sep_len;
2692 const char *abi;
2693
2694 memset (opts, '\0', sizeof (opts));
2695
2696 /* Add -march= option. */
2697 if (arch)
2698 {
2699 opts[num][0] = "-march=";
2700 opts[num++][1] = arch;
2701 }
2702
2703 /* Add -mtune= option. */
2704 if (tune)
2705 {
2706 opts[num][0] = "-mtune=";
2707 opts[num++][1] = tune;
2708 }
2709
2710 /* Add -m32/-m64/-mx32. */
2711 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2712 {
2713 if ((isa & OPTION_MASK_ABI_64) != 0)
2714 abi = "-m64";
2715 else
2716 abi = "-mx32";
2717 isa &= ~ (OPTION_MASK_ISA_64BIT
2718 | OPTION_MASK_ABI_64
2719 | OPTION_MASK_ABI_X32);
2720 }
2721 else
2722 abi = "-m32";
2723 opts[num++][0] = abi;
2724
2725 /* Pick out the options in isa options. */
2726 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2727 {
2728 if ((isa & isa_opts[i].mask) != 0)
2729 {
2730 opts[num++][0] = isa_opts[i].option;
2731 isa &= ~ isa_opts[i].mask;
2732 }
2733 }
2734
2735 if (isa && add_nl_p)
2736 {
2737 opts[num++][0] = isa_other;
2738 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2739 isa);
2740 }
2741
2742 /* Add flag options. */
2743 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2744 {
2745 if ((flags & flag_opts[i].mask) != 0)
2746 {
2747 opts[num++][0] = flag_opts[i].option;
2748 flags &= ~ flag_opts[i].mask;
2749 }
2750 }
2751
2752 if (flags && add_nl_p)
2753 {
2754 opts[num++][0] = target_other;
2755 sprintf (target_other, "(other flags: %#x)", flags);
2756 }
2757
2758 /* Add -fpmath= option. */
2759 if (fpmath)
2760 {
2761 opts[num][0] = "-mfpmath=";
2762 switch ((int) fpmath)
2763 {
2764 case FPMATH_387:
2765 opts[num++][1] = "387";
2766 break;
2767
2768 case FPMATH_SSE:
2769 opts[num++][1] = "sse";
2770 break;
2771
2772 case FPMATH_387 | FPMATH_SSE:
2773 opts[num++][1] = "sse+387";
2774 break;
2775
2776 default:
2777 gcc_unreachable ();
2778 }
2779 }
2780
2781 /* Any options? */
2782 if (num == 0)
2783 return NULL;
2784
2785 gcc_assert (num < ARRAY_SIZE (opts));
2786
2787 /* Size the string. */
2788 len = 0;
2789 sep_len = (add_nl_p) ? 3 : 1;
2790 for (i = 0; i < num; i++)
2791 {
2792 len += sep_len;
2793 for (j = 0; j < 2; j++)
2794 if (opts[i][j])
2795 len += strlen (opts[i][j]);
2796 }
2797
2798 /* Build the string. */
2799 ret = ptr = (char *) xmalloc (len);
2800 line_len = 0;
2801
2802 for (i = 0; i < num; i++)
2803 {
2804 size_t len2[2];
2805
2806 for (j = 0; j < 2; j++)
2807 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2808
2809 if (i != 0)
2810 {
2811 *ptr++ = ' ';
2812 line_len++;
2813
2814 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2815 {
2816 *ptr++ = '\\';
2817 *ptr++ = '\n';
2818 line_len = 0;
2819 }
2820 }
2821
2822 for (j = 0; j < 2; j++)
2823 if (opts[i][j])
2824 {
2825 memcpy (ptr, opts[i][j], len2[j]);
2826 ptr += len2[j];
2827 line_len += len2[j];
2828 }
2829 }
2830
2831 *ptr = '\0';
2832 gcc_assert (ret + len >= ptr);
2833
2834 return ret;
2835 }
2836
2837 /* Return true, if profiling code should be emitted before
2838 prologue. Otherwise it returns false.
2839 Note: For x86 with "hotfix" it is sorried. */
2840 static bool
2841 ix86_profile_before_prologue (void)
2842 {
2843 return flag_fentry != 0;
2844 }
2845
2846 /* Function that is callable from the debugger to print the current
2847 options. */
2848 void ATTRIBUTE_UNUSED
2849 ix86_debug_options (void)
2850 {
2851 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2852 ix86_arch_string, ix86_tune_string,
2853 ix86_fpmath, true);
2854
2855 if (opts)
2856 {
2857 fprintf (stderr, "%s\n\n", opts);
2858 free (opts);
2859 }
2860 else
2861 fputs ("<no options>\n\n", stderr);
2862
2863 return;
2864 }
2865
2866 static const char *stringop_alg_names[] = {
2867 #define DEF_ENUM
2868 #define DEF_ALG(alg, name) #name,
2869 #include "stringop.def"
2870 #undef DEF_ENUM
2871 #undef DEF_ALG
2872 };
2873
2874 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2875 The string is of the following form (or comma separated list of it):
2876
2877 strategy_alg:max_size:[align|noalign]
2878
2879 where the full size range for the strategy is either [0, max_size] or
2880 [min_size, max_size], in which min_size is the max_size + 1 of the
2881 preceding range. The last size range must have max_size == -1.
2882
2883 Examples:
2884
2885 1.
2886 -mmemcpy-strategy=libcall:-1:noalign
2887
2888 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2889
2890
2891 2.
2892 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2893
2894 This is to tell the compiler to use the following strategy for memset
2895 1) when the expected size is between [1, 16], use rep_8byte strategy;
2896 2) when the size is between [17, 2048], use vector_loop;
2897 3) when the size is > 2048, use libcall. */
2898
2899 struct stringop_size_range
2900 {
2901 int max;
2902 stringop_alg alg;
2903 bool noalign;
2904 };
2905
2906 static void
2907 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2908 {
2909 const struct stringop_algs *default_algs;
2910 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2911 char *curr_range_str, *next_range_str;
2912 int i = 0, n = 0;
2913
2914 if (is_memset)
2915 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2916 else
2917 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2918
2919 curr_range_str = strategy_str;
2920
2921 do
2922 {
2923 int maxs;
2924 char alg_name[128];
2925 char align[16];
2926 next_range_str = strchr (curr_range_str, ',');
2927 if (next_range_str)
2928 *next_range_str++ = '\0';
2929
2930 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2931 alg_name, &maxs, align))
2932 {
2933 error ("wrong arg %s to option %s", curr_range_str,
2934 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2935 return;
2936 }
2937
2938 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2939 {
2940 error ("size ranges of option %s should be increasing",
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 for (i = 0; i < last_alg; i++)
2946 if (!strcmp (alg_name, stringop_alg_names[i]))
2947 break;
2948
2949 if (i == last_alg)
2950 {
2951 error ("wrong stringop strategy name %s specified for option %s",
2952 alg_name,
2953 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2954 return;
2955 }
2956
2957 input_ranges[n].max = maxs;
2958 input_ranges[n].alg = (stringop_alg) i;
2959 if (!strcmp (align, "align"))
2960 input_ranges[n].noalign = false;
2961 else if (!strcmp (align, "noalign"))
2962 input_ranges[n].noalign = true;
2963 else
2964 {
2965 error ("unknown alignment %s specified for option %s",
2966 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969 n++;
2970 curr_range_str = next_range_str;
2971 }
2972 while (curr_range_str);
2973
2974 if (input_ranges[n - 1].max != -1)
2975 {
2976 error ("the max value for the last size range should be -1"
2977 " for option %s",
2978 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979 return;
2980 }
2981
2982 if (n > MAX_STRINGOP_ALGS)
2983 {
2984 error ("too many size ranges specified in option %s",
2985 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2986 return;
2987 }
2988
2989 /* Now override the default algs array. */
2990 for (i = 0; i < n; i++)
2991 {
2992 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2993 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2994 = input_ranges[i].alg;
2995 *const_cast<int *>(&default_algs->size[i].noalign)
2996 = input_ranges[i].noalign;
2997 }
2998 }
2999
3000 \f
3001 /* parse -mtune-ctrl= option. When DUMP is true,
3002 print the features that are explicitly set. */
3003
3004 static void
3005 parse_mtune_ctrl_str (bool dump)
3006 {
3007 if (!ix86_tune_ctrl_string)
3008 return;
3009
3010 char *next_feature_string = NULL;
3011 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3012 char *orig = curr_feature_string;
3013 int i;
3014 do
3015 {
3016 bool clear = false;
3017
3018 next_feature_string = strchr (curr_feature_string, ',');
3019 if (next_feature_string)
3020 *next_feature_string++ = '\0';
3021 if (*curr_feature_string == '^')
3022 {
3023 curr_feature_string++;
3024 clear = true;
3025 }
3026 for (i = 0; i < X86_TUNE_LAST; i++)
3027 {
3028 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3029 {
3030 ix86_tune_features[i] = !clear;
3031 if (dump)
3032 fprintf (stderr, "Explicitly %s feature %s\n",
3033 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3034 break;
3035 }
3036 }
3037 if (i == X86_TUNE_LAST)
3038 error ("Unknown parameter to option -mtune-ctrl: %s",
3039 clear ? curr_feature_string - 1 : curr_feature_string);
3040 curr_feature_string = next_feature_string;
3041 }
3042 while (curr_feature_string);
3043 free (orig);
3044 }
3045
3046 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3047 processor type. */
3048
3049 static void
3050 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3051 {
3052 unsigned int ix86_tune_mask = 1u << ix86_tune;
3053 int i;
3054
3055 for (i = 0; i < X86_TUNE_LAST; ++i)
3056 {
3057 if (ix86_tune_no_default)
3058 ix86_tune_features[i] = 0;
3059 else
3060 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3061 }
3062
3063 if (dump)
3064 {
3065 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3066 for (i = 0; i < X86_TUNE_LAST; i++)
3067 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3068 ix86_tune_features[i] ? "on" : "off");
3069 }
3070
3071 parse_mtune_ctrl_str (dump);
3072 }
3073
3074
3075 /* Override various settings based on options. If MAIN_ARGS_P, the
3076 options are from the command line, otherwise they are from
3077 attributes. */
3078
3079 static void
3080 ix86_option_override_internal (bool main_args_p,
3081 struct gcc_options *opts,
3082 struct gcc_options *opts_set)
3083 {
3084 int i;
3085 unsigned int ix86_arch_mask;
3086 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3087 const char *prefix;
3088 const char *suffix;
3089 const char *sw;
3090
3091 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3092 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3093 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3094 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3095 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3096 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3097 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3098 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3099 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3100 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3101 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3102 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3103 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3104 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3105 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3106 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3107 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3108 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3109 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3110 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3111 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3112 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3113 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3114 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3115 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3116 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3117 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3118 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3119 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3120 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3121 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3122 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3123 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3124 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3125 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3126 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3127 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3128 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3129 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3130 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3131 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3132 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3133 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3134 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3135 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3136 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3137 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3138 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3139 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3140 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3141 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3142 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3143
3144 #define PTA_CORE2 \
3145 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3146 | PTA_CX16 | PTA_FXSR)
3147 #define PTA_NEHALEM \
3148 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3149 #define PTA_WESTMERE \
3150 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3151 #define PTA_SANDYBRIDGE \
3152 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3153 #define PTA_IVYBRIDGE \
3154 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3155 #define PTA_HASWELL \
3156 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3157 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3158 #define PTA_BROADWELL \
3159 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3160 #define PTA_BONNELL \
3161 (PTA_CORE2 | PTA_MOVBE)
3162 #define PTA_SILVERMONT \
3163 (PTA_WESTMERE | PTA_MOVBE)
3164
3165 /* if this reaches 64, need to widen struct pta flags below */
3166
3167 static struct pta
3168 {
3169 const char *const name; /* processor name or nickname. */
3170 const enum processor_type processor;
3171 const enum attr_cpu schedule;
3172 const unsigned HOST_WIDE_INT flags;
3173 }
3174 const processor_alias_table[] =
3175 {
3176 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3177 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3178 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3179 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3180 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3181 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3182 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3183 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3184 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3185 PTA_MMX | PTA_SSE | PTA_FXSR},
3186 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3187 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3188 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3189 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3190 PTA_MMX | PTA_SSE | PTA_FXSR},
3191 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3192 PTA_MMX | PTA_SSE | PTA_FXSR},
3193 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3194 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3195 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3196 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3197 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3198 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3199 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3200 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3201 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3202 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3203 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3204 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3205 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3206 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3207 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3208 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3209 PTA_SANDYBRIDGE},
3210 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3211 PTA_SANDYBRIDGE},
3212 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3213 PTA_IVYBRIDGE},
3214 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3215 PTA_IVYBRIDGE},
3216 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3217 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3218 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3219 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3220 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3221 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3222 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3223 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3224 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3225 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3226 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3227 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3228 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3229 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3230 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3231 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3232 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3233 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3234 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3235 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3236 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3237 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3238 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3239 {"x86-64", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3241 {"k8", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"opteron", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3252 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3253 {"athlon64", PROCESSOR_K8, CPU_K8,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3255 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3256 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3257 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3258 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3259 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3260 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3261 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3262 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3263 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3264 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3265 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3266 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3267 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3268 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3269 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3270 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3271 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3272 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3273 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3274 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3275 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3276 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3277 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3278 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3279 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3280 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3281 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3282 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3283 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3284 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3285 | PTA_XSAVEOPT | PTA_FSGSBASE},
3286 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3290 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3291 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3292 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3293 | PTA_MOVBE},
3294 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3295 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3296 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3297 | PTA_FXSR | PTA_XSAVE},
3298 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3299 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3300 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3301 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3302 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3303 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3304
3305 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3306 PTA_64BIT
3307 | PTA_HLE /* flags are only used for -march switch. */ },
3308 };
3309
3310 /* -mrecip options. */
3311 static struct
3312 {
3313 const char *string; /* option name */
3314 unsigned int mask; /* mask bits to set */
3315 }
3316 const recip_options[] =
3317 {
3318 { "all", RECIP_MASK_ALL },
3319 { "none", RECIP_MASK_NONE },
3320 { "div", RECIP_MASK_DIV },
3321 { "sqrt", RECIP_MASK_SQRT },
3322 { "vec-div", RECIP_MASK_VEC_DIV },
3323 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3324 };
3325
3326 int const pta_size = ARRAY_SIZE (processor_alias_table);
3327
3328 /* Set up prefix/suffix so the error messages refer to either the command
3329 line argument, or the attribute(target). */
3330 if (main_args_p)
3331 {
3332 prefix = "-m";
3333 suffix = "";
3334 sw = "switch";
3335 }
3336 else
3337 {
3338 prefix = "option(\"";
3339 suffix = "\")";
3340 sw = "attribute";
3341 }
3342
3343 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3344 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3345 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3346 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3347 #ifdef TARGET_BI_ARCH
3348 else
3349 {
3350 #if TARGET_BI_ARCH == 1
3351 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3352 is on and OPTION_MASK_ABI_X32 is off. We turn off
3353 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3354 -mx32. */
3355 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3356 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3357 #else
3358 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3359 on and OPTION_MASK_ABI_64 is off. We turn off
3360 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3361 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3362 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3363 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3364 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3365 #endif
3366 }
3367 #endif
3368
3369 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3370 {
3371 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3372 OPTION_MASK_ABI_64 for TARGET_X32. */
3373 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3374 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3375 }
3376 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3377 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3378 | OPTION_MASK_ABI_X32
3379 | OPTION_MASK_ABI_64);
3380 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3381 {
3382 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3383 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3384 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3385 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3386 }
3387
3388 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3389 SUBTARGET_OVERRIDE_OPTIONS;
3390 #endif
3391
3392 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3393 SUBSUBTARGET_OVERRIDE_OPTIONS;
3394 #endif
3395
3396 /* -fPIC is the default for x86_64. */
3397 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3398 opts->x_flag_pic = 2;
3399
3400 /* Need to check -mtune=generic first. */
3401 if (opts->x_ix86_tune_string)
3402 {
3403 /* As special support for cross compilers we read -mtune=native
3404 as -mtune=generic. With native compilers we won't see the
3405 -mtune=native, as it was changed by the driver. */
3406 if (!strcmp (opts->x_ix86_tune_string, "native"))
3407 {
3408 opts->x_ix86_tune_string = "generic";
3409 }
3410 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3411 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3412 "%stune=k8%s or %stune=generic%s instead as appropriate",
3413 prefix, suffix, prefix, suffix, prefix, suffix);
3414 }
3415 else
3416 {
3417 if (opts->x_ix86_arch_string)
3418 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3419 if (!opts->x_ix86_tune_string)
3420 {
3421 opts->x_ix86_tune_string
3422 = processor_target_table[TARGET_CPU_DEFAULT].name;
3423 ix86_tune_defaulted = 1;
3424 }
3425
3426 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3427 or defaulted. We need to use a sensible tune option. */
3428 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3429 {
3430 opts->x_ix86_tune_string = "generic";
3431 }
3432 }
3433
3434 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3435 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3436 {
3437 /* rep; movq isn't available in 32-bit code. */
3438 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3439 opts->x_ix86_stringop_alg = no_stringop;
3440 }
3441
3442 if (!opts->x_ix86_arch_string)
3443 opts->x_ix86_arch_string
3444 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3445 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3446 else
3447 ix86_arch_specified = 1;
3448
3449 if (opts_set->x_ix86_pmode)
3450 {
3451 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3452 && opts->x_ix86_pmode == PMODE_SI)
3453 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3454 && opts->x_ix86_pmode == PMODE_DI))
3455 error ("address mode %qs not supported in the %s bit mode",
3456 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3457 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3458 }
3459 else
3460 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3461 ? PMODE_DI : PMODE_SI;
3462
3463 if (!opts_set->x_ix86_abi)
3464 opts->x_ix86_abi = DEFAULT_ABI;
3465
3466 /* For targets using ms ABI enable ms-extensions, if not
3467 explicit turned off. For non-ms ABI we turn off this
3468 option. */
3469 if (!opts_set->x_flag_ms_extensions)
3470 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3471
3472 if (opts_set->x_ix86_cmodel)
3473 {
3474 switch (opts->x_ix86_cmodel)
3475 {
3476 case CM_SMALL:
3477 case CM_SMALL_PIC:
3478 if (opts->x_flag_pic)
3479 opts->x_ix86_cmodel = CM_SMALL_PIC;
3480 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3481 error ("code model %qs not supported in the %s bit mode",
3482 "small", "32");
3483 break;
3484
3485 case CM_MEDIUM:
3486 case CM_MEDIUM_PIC:
3487 if (opts->x_flag_pic)
3488 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3489 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3490 error ("code model %qs not supported in the %s bit mode",
3491 "medium", "32");
3492 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3493 error ("code model %qs not supported in x32 mode",
3494 "medium");
3495 break;
3496
3497 case CM_LARGE:
3498 case CM_LARGE_PIC:
3499 if (opts->x_flag_pic)
3500 opts->x_ix86_cmodel = CM_LARGE_PIC;
3501 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3502 error ("code model %qs not supported in the %s bit mode",
3503 "large", "32");
3504 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3505 error ("code model %qs not supported in x32 mode",
3506 "large");
3507 break;
3508
3509 case CM_32:
3510 if (opts->x_flag_pic)
3511 error ("code model %s does not support PIC mode", "32");
3512 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3513 error ("code model %qs not supported in the %s bit mode",
3514 "32", "64");
3515 break;
3516
3517 case CM_KERNEL:
3518 if (opts->x_flag_pic)
3519 {
3520 error ("code model %s does not support PIC mode", "kernel");
3521 opts->x_ix86_cmodel = CM_32;
3522 }
3523 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3524 error ("code model %qs not supported in the %s bit mode",
3525 "kernel", "32");
3526 break;
3527
3528 default:
3529 gcc_unreachable ();
3530 }
3531 }
3532 else
3533 {
3534 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3535 use of rip-relative addressing. This eliminates fixups that
3536 would otherwise be needed if this object is to be placed in a
3537 DLL, and is essentially just as efficient as direct addressing. */
3538 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3539 && (TARGET_RDOS || TARGET_PECOFF))
3540 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3541 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3542 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3543 else
3544 opts->x_ix86_cmodel = CM_32;
3545 }
3546 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3547 {
3548 error ("-masm=intel not supported in this configuration");
3549 opts->x_ix86_asm_dialect = ASM_ATT;
3550 }
3551 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3552 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3553 sorry ("%i-bit mode not compiled in",
3554 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3555
3556 for (i = 0; i < pta_size; i++)
3557 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3558 {
3559 ix86_schedule = processor_alias_table[i].schedule;
3560 ix86_arch = processor_alias_table[i].processor;
3561 /* Default cpu tuning to the architecture. */
3562 ix86_tune = ix86_arch;
3563
3564 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3565 && !(processor_alias_table[i].flags & PTA_64BIT))
3566 error ("CPU you selected does not support x86-64 "
3567 "instruction set");
3568
3569 if (processor_alias_table[i].flags & PTA_MMX
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3572 if (processor_alias_table[i].flags & PTA_3DNOW
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3575 if (processor_alias_table[i].flags & PTA_3DNOW_A
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3578 if (processor_alias_table[i].flags & PTA_SSE
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3581 if (processor_alias_table[i].flags & PTA_SSE2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3584 if (processor_alias_table[i].flags & PTA_SSE3
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3587 if (processor_alias_table[i].flags & PTA_SSSE3
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3590 if (processor_alias_table[i].flags & PTA_SSE4_1
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3593 if (processor_alias_table[i].flags & PTA_SSE4_2
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3596 if (processor_alias_table[i].flags & PTA_AVX
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3599 if (processor_alias_table[i].flags & PTA_AVX2
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3602 if (processor_alias_table[i].flags & PTA_FMA
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3605 if (processor_alias_table[i].flags & PTA_SSE4A
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3608 if (processor_alias_table[i].flags & PTA_FMA4
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3611 if (processor_alias_table[i].flags & PTA_XOP
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3614 if (processor_alias_table[i].flags & PTA_LWP
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3617 if (processor_alias_table[i].flags & PTA_ABM
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3620 if (processor_alias_table[i].flags & PTA_BMI
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3623 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3626 if (processor_alias_table[i].flags & PTA_TBM
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3629 if (processor_alias_table[i].flags & PTA_BMI2
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3632 if (processor_alias_table[i].flags & PTA_CX16
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3635 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3638 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3639 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3642 if (processor_alias_table[i].flags & PTA_MOVBE
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3645 if (processor_alias_table[i].flags & PTA_AES
3646 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3647 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3648 if (processor_alias_table[i].flags & PTA_SHA
3649 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3650 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3651 if (processor_alias_table[i].flags & PTA_PCLMUL
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3654 if (processor_alias_table[i].flags & PTA_FSGSBASE
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3657 if (processor_alias_table[i].flags & PTA_RDRND
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3660 if (processor_alias_table[i].flags & PTA_F16C
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3663 if (processor_alias_table[i].flags & PTA_RTM
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3666 if (processor_alias_table[i].flags & PTA_HLE
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3669 if (processor_alias_table[i].flags & PTA_PRFCHW
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3672 if (processor_alias_table[i].flags & PTA_RDSEED
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3675 if (processor_alias_table[i].flags & PTA_ADX
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3678 if (processor_alias_table[i].flags & PTA_FXSR
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3681 if (processor_alias_table[i].flags & PTA_XSAVE
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3684 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3687 if (processor_alias_table[i].flags & PTA_AVX512F
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3690 if (processor_alias_table[i].flags & PTA_AVX512ER
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3693 if (processor_alias_table[i].flags & PTA_AVX512PF
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3696 if (processor_alias_table[i].flags & PTA_AVX512CD
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3699 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3702 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3705 if (processor_alias_table[i].flags & PTA_XSAVEC
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3708 if (processor_alias_table[i].flags & PTA_XSAVES
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3711 if (processor_alias_table[i].flags & PTA_AVX512DQ
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3714 if (processor_alias_table[i].flags & PTA_AVX512BW
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3717 if (processor_alias_table[i].flags & PTA_AVX512VL
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3720 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3721 x86_prefetch_sse = true;
3722
3723 break;
3724 }
3725
3726 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3727 error ("generic CPU can be used only for %stune=%s %s",
3728 prefix, suffix, sw);
3729 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3730 error ("intel CPU can be used only for %stune=%s %s",
3731 prefix, suffix, sw);
3732 else if (i == pta_size)
3733 error ("bad value (%s) for %sarch=%s %s",
3734 opts->x_ix86_arch_string, prefix, suffix, sw);
3735
3736 ix86_arch_mask = 1u << ix86_arch;
3737 for (i = 0; i < X86_ARCH_LAST; ++i)
3738 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3739
3740 for (i = 0; i < pta_size; i++)
3741 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3742 {
3743 ix86_schedule = processor_alias_table[i].schedule;
3744 ix86_tune = processor_alias_table[i].processor;
3745 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3746 {
3747 if (!(processor_alias_table[i].flags & PTA_64BIT))
3748 {
3749 if (ix86_tune_defaulted)
3750 {
3751 opts->x_ix86_tune_string = "x86-64";
3752 for (i = 0; i < pta_size; i++)
3753 if (! strcmp (opts->x_ix86_tune_string,
3754 processor_alias_table[i].name))
3755 break;
3756 ix86_schedule = processor_alias_table[i].schedule;
3757 ix86_tune = processor_alias_table[i].processor;
3758 }
3759 else
3760 error ("CPU you selected does not support x86-64 "
3761 "instruction set");
3762 }
3763 }
3764 /* Intel CPUs have always interpreted SSE prefetch instructions as
3765 NOPs; so, we can enable SSE prefetch instructions even when
3766 -mtune (rather than -march) points us to a processor that has them.
3767 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3768 higher processors. */
3769 if (TARGET_CMOV
3770 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3771 x86_prefetch_sse = true;
3772 break;
3773 }
3774
3775 if (ix86_tune_specified && i == pta_size)
3776 error ("bad value (%s) for %stune=%s %s",
3777 opts->x_ix86_tune_string, prefix, suffix, sw);
3778
3779 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3780
3781 #ifndef USE_IX86_FRAME_POINTER
3782 #define USE_IX86_FRAME_POINTER 0
3783 #endif
3784
3785 #ifndef USE_X86_64_FRAME_POINTER
3786 #define USE_X86_64_FRAME_POINTER 0
3787 #endif
3788
3789 /* Set the default values for switches whose default depends on TARGET_64BIT
3790 in case they weren't overwritten by command line options. */
3791 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3792 {
3793 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3794 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3795 if (opts->x_flag_asynchronous_unwind_tables
3796 && !opts_set->x_flag_unwind_tables
3797 && TARGET_64BIT_MS_ABI)
3798 opts->x_flag_unwind_tables = 1;
3799 if (opts->x_flag_asynchronous_unwind_tables == 2)
3800 opts->x_flag_unwind_tables
3801 = opts->x_flag_asynchronous_unwind_tables = 1;
3802 if (opts->x_flag_pcc_struct_return == 2)
3803 opts->x_flag_pcc_struct_return = 0;
3804 }
3805 else
3806 {
3807 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3808 opts->x_flag_omit_frame_pointer
3809 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3810 if (opts->x_flag_asynchronous_unwind_tables == 2)
3811 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3812 if (opts->x_flag_pcc_struct_return == 2)
3813 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3814 }
3815
3816 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3817 if (opts->x_optimize_size)
3818 ix86_cost = &ix86_size_cost;
3819 else
3820 ix86_cost = ix86_tune_cost;
3821
3822 /* Arrange to set up i386_stack_locals for all functions. */
3823 init_machine_status = ix86_init_machine_status;
3824
3825 /* Validate -mregparm= value. */
3826 if (opts_set->x_ix86_regparm)
3827 {
3828 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3829 warning (0, "-mregparm is ignored in 64-bit mode");
3830 if (opts->x_ix86_regparm > REGPARM_MAX)
3831 {
3832 error ("-mregparm=%d is not between 0 and %d",
3833 opts->x_ix86_regparm, REGPARM_MAX);
3834 opts->x_ix86_regparm = 0;
3835 }
3836 }
3837 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3838 opts->x_ix86_regparm = REGPARM_MAX;
3839
3840 /* Default align_* from the processor table. */
3841 if (opts->x_align_loops == 0)
3842 {
3843 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3844 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3845 }
3846 if (opts->x_align_jumps == 0)
3847 {
3848 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3849 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3850 }
3851 if (opts->x_align_functions == 0)
3852 {
3853 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3854 }
3855
3856 /* Provide default for -mbranch-cost= value. */
3857 if (!opts_set->x_ix86_branch_cost)
3858 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3859
3860 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3861 {
3862 opts->x_target_flags
3863 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3864
3865 /* Enable by default the SSE and MMX builtins. Do allow the user to
3866 explicitly disable any of these. In particular, disabling SSE and
3867 MMX for kernel code is extremely useful. */
3868 if (!ix86_arch_specified)
3869 opts->x_ix86_isa_flags
3870 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3871 | TARGET_SUBTARGET64_ISA_DEFAULT)
3872 & ~opts->x_ix86_isa_flags_explicit);
3873
3874 if (TARGET_RTD_P (opts->x_target_flags))
3875 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3876 }
3877 else
3878 {
3879 opts->x_target_flags
3880 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3881
3882 if (!ix86_arch_specified)
3883 opts->x_ix86_isa_flags
3884 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3885
3886 /* i386 ABI does not specify red zone. It still makes sense to use it
3887 when programmer takes care to stack from being destroyed. */
3888 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3889 opts->x_target_flags |= MASK_NO_RED_ZONE;
3890 }
3891
3892 /* Keep nonleaf frame pointers. */
3893 if (opts->x_flag_omit_frame_pointer)
3894 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3895 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3896 opts->x_flag_omit_frame_pointer = 1;
3897
3898 /* If we're doing fast math, we don't care about comparison order
3899 wrt NaNs. This lets us use a shorter comparison sequence. */
3900 if (opts->x_flag_finite_math_only)
3901 opts->x_target_flags &= ~MASK_IEEE_FP;
3902
3903 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3904 since the insns won't need emulation. */
3905 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3906 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3907
3908 /* Likewise, if the target doesn't have a 387, or we've specified
3909 software floating point, don't use 387 inline intrinsics. */
3910 if (!TARGET_80387_P (opts->x_target_flags))
3911 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3912
3913 /* Turn on MMX builtins for -msse. */
3914 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3915 opts->x_ix86_isa_flags
3916 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3917
3918 /* Enable SSE prefetch. */
3919 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3920 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3921 x86_prefetch_sse = true;
3922
3923 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3924 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3925 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3926 opts->x_ix86_isa_flags
3927 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3928
3929 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3930 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3931 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3932 opts->x_ix86_isa_flags
3933 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3934
3935 /* Enable lzcnt instruction for -mabm. */
3936 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3937 opts->x_ix86_isa_flags
3938 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3939
3940 /* Validate -mpreferred-stack-boundary= value or default it to
3941 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3942 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3943 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3944 {
3945 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3946 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3947 int max = (TARGET_SEH ? 4 : 12);
3948
3949 if (opts->x_ix86_preferred_stack_boundary_arg < min
3950 || opts->x_ix86_preferred_stack_boundary_arg > max)
3951 {
3952 if (min == max)
3953 error ("-mpreferred-stack-boundary is not supported "
3954 "for this target");
3955 else
3956 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3957 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3958 }
3959 else
3960 ix86_preferred_stack_boundary
3961 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3962 }
3963
3964 /* Set the default value for -mstackrealign. */
3965 if (opts->x_ix86_force_align_arg_pointer == -1)
3966 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3967
3968 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3969
3970 /* Validate -mincoming-stack-boundary= value or default it to
3971 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3972 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3973 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3974 {
3975 if (opts->x_ix86_incoming_stack_boundary_arg
3976 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3977 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3978 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3979 opts->x_ix86_incoming_stack_boundary_arg,
3980 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3981 else
3982 {
3983 ix86_user_incoming_stack_boundary
3984 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3985 ix86_incoming_stack_boundary
3986 = ix86_user_incoming_stack_boundary;
3987 }
3988 }
3989
3990 #ifndef NO_PROFILE_COUNTERS
3991 if (flag_nop_mcount)
3992 error ("-mnop-mcount is not compatible with this target");
3993 #endif
3994 if (flag_nop_mcount && flag_pic)
3995 error ("-mnop-mcount is not implemented for -fPIC");
3996
3997 /* Accept -msseregparm only if at least SSE support is enabled. */
3998 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3999 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4000 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4001
4002 if (opts_set->x_ix86_fpmath)
4003 {
4004 if (opts->x_ix86_fpmath & FPMATH_SSE)
4005 {
4006 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4007 {
4008 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4009 opts->x_ix86_fpmath = FPMATH_387;
4010 }
4011 else if ((opts->x_ix86_fpmath & FPMATH_387)
4012 && !TARGET_80387_P (opts->x_target_flags))
4013 {
4014 warning (0, "387 instruction set disabled, using SSE arithmetics");
4015 opts->x_ix86_fpmath = FPMATH_SSE;
4016 }
4017 }
4018 }
4019 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4020 fpmath=387. The second is however default at many targets since the
4021 extra 80bit precision of temporaries is considered to be part of ABI.
4022 Overwrite the default at least for -ffast-math.
4023 TODO: -mfpmath=both seems to produce same performing code with bit
4024 smaller binaries. It is however not clear if register allocation is
4025 ready for this setting.
4026 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4027 codegen. We may switch to 387 with -ffast-math for size optimized
4028 functions. */
4029 else if (fast_math_flags_set_p (&global_options)
4030 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4031 opts->x_ix86_fpmath = FPMATH_SSE;
4032 else
4033 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4034
4035 /* If the i387 is disabled, then do not return values in it. */
4036 if (!TARGET_80387_P (opts->x_target_flags))
4037 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4038
4039 /* Use external vectorized library in vectorizing intrinsics. */
4040 if (opts_set->x_ix86_veclibabi_type)
4041 switch (opts->x_ix86_veclibabi_type)
4042 {
4043 case ix86_veclibabi_type_svml:
4044 ix86_veclib_handler = ix86_veclibabi_svml;
4045 break;
4046
4047 case ix86_veclibabi_type_acml:
4048 ix86_veclib_handler = ix86_veclibabi_acml;
4049 break;
4050
4051 default:
4052 gcc_unreachable ();
4053 }
4054
4055 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4056 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4057 && !opts->x_optimize_size)
4058 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4059
4060 /* If stack probes are required, the space used for large function
4061 arguments on the stack must also be probed, so enable
4062 -maccumulate-outgoing-args so this happens in the prologue. */
4063 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4064 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4065 {
4066 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4067 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4068 "for correctness", prefix, suffix);
4069 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4070 }
4071
4072 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4073 {
4074 char *p;
4075 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4076 p = strchr (internal_label_prefix, 'X');
4077 internal_label_prefix_len = p - internal_label_prefix;
4078 *p = '\0';
4079 }
4080
4081 /* When scheduling description is not available, disable scheduler pass
4082 so it won't slow down the compilation and make x87 code slower. */
4083 if (!TARGET_SCHEDULE)
4084 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4085
4086 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4087 ix86_tune_cost->simultaneous_prefetches,
4088 opts->x_param_values,
4089 opts_set->x_param_values);
4090 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4091 ix86_tune_cost->prefetch_block,
4092 opts->x_param_values,
4093 opts_set->x_param_values);
4094 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4095 ix86_tune_cost->l1_cache_size,
4096 opts->x_param_values,
4097 opts_set->x_param_values);
4098 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4099 ix86_tune_cost->l2_cache_size,
4100 opts->x_param_values,
4101 opts_set->x_param_values);
4102
4103 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4104 if (opts->x_flag_prefetch_loop_arrays < 0
4105 && HAVE_prefetch
4106 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4107 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4108 opts->x_flag_prefetch_loop_arrays = 1;
4109
4110 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4111 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4112 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4113 targetm.expand_builtin_va_start = NULL;
4114
4115 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4116 {
4117 ix86_gen_leave = gen_leave_rex64;
4118 if (Pmode == DImode)
4119 {
4120 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4121 ix86_gen_tls_local_dynamic_base_64
4122 = gen_tls_local_dynamic_base_64_di;
4123 }
4124 else
4125 {
4126 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4127 ix86_gen_tls_local_dynamic_base_64
4128 = gen_tls_local_dynamic_base_64_si;
4129 }
4130 }
4131 else
4132 ix86_gen_leave = gen_leave;
4133
4134 if (Pmode == DImode)
4135 {
4136 ix86_gen_add3 = gen_adddi3;
4137 ix86_gen_sub3 = gen_subdi3;
4138 ix86_gen_sub3_carry = gen_subdi3_carry;
4139 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4140 ix86_gen_andsp = gen_anddi3;
4141 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4142 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4143 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4144 ix86_gen_monitor = gen_sse3_monitor_di;
4145 }
4146 else
4147 {
4148 ix86_gen_add3 = gen_addsi3;
4149 ix86_gen_sub3 = gen_subsi3;
4150 ix86_gen_sub3_carry = gen_subsi3_carry;
4151 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4152 ix86_gen_andsp = gen_andsi3;
4153 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4154 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4155 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4156 ix86_gen_monitor = gen_sse3_monitor_si;
4157 }
4158
4159 #ifdef USE_IX86_CLD
4160 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4161 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4162 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4163 #endif
4164
4165 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4166 {
4167 if (opts->x_flag_fentry > 0)
4168 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4169 "with -fpic");
4170 opts->x_flag_fentry = 0;
4171 }
4172 else if (TARGET_SEH)
4173 {
4174 if (opts->x_flag_fentry == 0)
4175 sorry ("-mno-fentry isn%'t compatible with SEH");
4176 opts->x_flag_fentry = 1;
4177 }
4178 else if (opts->x_flag_fentry < 0)
4179 {
4180 #if defined(PROFILE_BEFORE_PROLOGUE)
4181 opts->x_flag_fentry = 1;
4182 #else
4183 opts->x_flag_fentry = 0;
4184 #endif
4185 }
4186
4187 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4188 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4189 AVX unaligned load/store. */
4190 if (!opts->x_optimize_size)
4191 {
4192 if (flag_expensive_optimizations
4193 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4194 opts->x_target_flags |= MASK_VZEROUPPER;
4195 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4196 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4197 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4198 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4199 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4200 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4201 /* Enable 128-bit AVX instruction generation
4202 for the auto-vectorizer. */
4203 if (TARGET_AVX128_OPTIMAL
4204 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4205 opts->x_target_flags |= MASK_PREFER_AVX128;
4206 }
4207
4208 if (opts->x_ix86_recip_name)
4209 {
4210 char *p = ASTRDUP (opts->x_ix86_recip_name);
4211 char *q;
4212 unsigned int mask, i;
4213 bool invert;
4214
4215 while ((q = strtok (p, ",")) != NULL)
4216 {
4217 p = NULL;
4218 if (*q == '!')
4219 {
4220 invert = true;
4221 q++;
4222 }
4223 else
4224 invert = false;
4225
4226 if (!strcmp (q, "default"))
4227 mask = RECIP_MASK_ALL;
4228 else
4229 {
4230 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4231 if (!strcmp (q, recip_options[i].string))
4232 {
4233 mask = recip_options[i].mask;
4234 break;
4235 }
4236
4237 if (i == ARRAY_SIZE (recip_options))
4238 {
4239 error ("unknown option for -mrecip=%s", q);
4240 invert = false;
4241 mask = RECIP_MASK_NONE;
4242 }
4243 }
4244
4245 opts->x_recip_mask_explicit |= mask;
4246 if (invert)
4247 opts->x_recip_mask &= ~mask;
4248 else
4249 opts->x_recip_mask |= mask;
4250 }
4251 }
4252
4253 if (TARGET_RECIP_P (opts->x_target_flags))
4254 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4255 else if (opts_set->x_target_flags & MASK_RECIP)
4256 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4257
4258 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4259 for 64-bit Bionic. */
4260 if (TARGET_HAS_BIONIC
4261 && !(opts_set->x_target_flags
4262 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4263 opts->x_target_flags |= (TARGET_64BIT
4264 ? MASK_LONG_DOUBLE_128
4265 : MASK_LONG_DOUBLE_64);
4266
4267 /* Only one of them can be active. */
4268 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4269 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4270
4271 /* Save the initial options in case the user does function specific
4272 options. */
4273 if (main_args_p)
4274 target_option_default_node = target_option_current_node
4275 = build_target_option_node (opts);
4276
4277 /* Handle stack protector */
4278 if (!opts_set->x_ix86_stack_protector_guard)
4279 opts->x_ix86_stack_protector_guard
4280 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4281
4282 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4283 if (opts->x_ix86_tune_memcpy_strategy)
4284 {
4285 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4286 ix86_parse_stringop_strategy_string (str, false);
4287 free (str);
4288 }
4289
4290 if (opts->x_ix86_tune_memset_strategy)
4291 {
4292 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4293 ix86_parse_stringop_strategy_string (str, true);
4294 free (str);
4295 }
4296 }
4297
4298 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4299
4300 static void
4301 ix86_option_override (void)
4302 {
4303 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4304 static struct register_pass_info insert_vzeroupper_info
4305 = { pass_insert_vzeroupper, "reload",
4306 1, PASS_POS_INSERT_AFTER
4307 };
4308
4309 ix86_option_override_internal (true, &global_options, &global_options_set);
4310
4311
4312 /* This needs to be done at start up. It's convenient to do it here. */
4313 register_pass (&insert_vzeroupper_info);
4314 }
4315
4316 /* Update register usage after having seen the compiler flags. */
4317
4318 static void
4319 ix86_conditional_register_usage (void)
4320 {
4321 int i, c_mask;
4322 unsigned int j;
4323
4324 /* The PIC register, if it exists, is fixed. */
4325 j = PIC_OFFSET_TABLE_REGNUM;
4326 if (j != INVALID_REGNUM)
4327 fixed_regs[j] = call_used_regs[j] = 1;
4328
4329 /* For 32-bit targets, squash the REX registers. */
4330 if (! TARGET_64BIT)
4331 {
4332 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4333 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4334 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4335 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4336 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4337 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338 }
4339
4340 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4341 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4342 : TARGET_64BIT ? (1 << 2)
4343 : (1 << 1));
4344
4345 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4346
4347 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4348 {
4349 /* Set/reset conditionally defined registers from
4350 CALL_USED_REGISTERS initializer. */
4351 if (call_used_regs[i] > 1)
4352 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4353
4354 /* Calculate registers of CLOBBERED_REGS register set
4355 as call used registers from GENERAL_REGS register set. */
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4357 && call_used_regs[i])
4358 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4359 }
4360
4361 /* If MMX is disabled, squash the registers. */
4362 if (! TARGET_MMX)
4363 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4364 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4365 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4366
4367 /* If SSE is disabled, squash the registers. */
4368 if (! TARGET_SSE)
4369 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4370 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4371 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4372
4373 /* If the FPU is disabled, squash the registers. */
4374 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4375 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4376 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4377 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4378
4379 /* If AVX512F is disabled, squash the registers. */
4380 if (! TARGET_AVX512F)
4381 {
4382 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4383 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4384
4385 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4387 }
4388 }
4389
4390 \f
4391 /* Save the current options */
4392
4393 static void
4394 ix86_function_specific_save (struct cl_target_option *ptr,
4395 struct gcc_options *opts)
4396 {
4397 ptr->arch = ix86_arch;
4398 ptr->schedule = ix86_schedule;
4399 ptr->tune = ix86_tune;
4400 ptr->branch_cost = ix86_branch_cost;
4401 ptr->tune_defaulted = ix86_tune_defaulted;
4402 ptr->arch_specified = ix86_arch_specified;
4403 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4404 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4405 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4406 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4407 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4408 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4409 ptr->x_ix86_abi = opts->x_ix86_abi;
4410 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4411 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4412 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4413 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4414 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4415 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4416 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4417 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4418 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4419 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4420 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4421 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4422 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4423 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4424 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4425 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4426 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4427 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4428 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4429 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4430
4431 /* The fields are char but the variables are not; make sure the
4432 values fit in the fields. */
4433 gcc_assert (ptr->arch == ix86_arch);
4434 gcc_assert (ptr->schedule == ix86_schedule);
4435 gcc_assert (ptr->tune == ix86_tune);
4436 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4437 }
4438
4439 /* Restore the current options */
4440
4441 static void
4442 ix86_function_specific_restore (struct gcc_options *opts,
4443 struct cl_target_option *ptr)
4444 {
4445 enum processor_type old_tune = ix86_tune;
4446 enum processor_type old_arch = ix86_arch;
4447 unsigned int ix86_arch_mask;
4448 int i;
4449
4450 /* We don't change -fPIC. */
4451 opts->x_flag_pic = flag_pic;
4452
4453 ix86_arch = (enum processor_type) ptr->arch;
4454 ix86_schedule = (enum attr_cpu) ptr->schedule;
4455 ix86_tune = (enum processor_type) ptr->tune;
4456 opts->x_ix86_branch_cost = ptr->branch_cost;
4457 ix86_tune_defaulted = ptr->tune_defaulted;
4458 ix86_arch_specified = ptr->arch_specified;
4459 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4460 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4461 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4462 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4463 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4464 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4465 opts->x_ix86_abi = ptr->x_ix86_abi;
4466 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4467 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4468 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4469 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4470 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4471 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4472 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4473 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4474 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4475 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4476 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4477 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4478 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4479 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4480 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4481 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4482 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4483 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4484 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4485 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4486
4487 /* Recreate the arch feature tests if the arch changed */
4488 if (old_arch != ix86_arch)
4489 {
4490 ix86_arch_mask = 1u << ix86_arch;
4491 for (i = 0; i < X86_ARCH_LAST; ++i)
4492 ix86_arch_features[i]
4493 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4494 }
4495
4496 /* Recreate the tune optimization tests */
4497 if (old_tune != ix86_tune)
4498 set_ix86_tune_features (ix86_tune, false);
4499 }
4500
4501 /* Print the current options */
4502
4503 static void
4504 ix86_function_specific_print (FILE *file, int indent,
4505 struct cl_target_option *ptr)
4506 {
4507 char *target_string
4508 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4509 NULL, NULL, ptr->x_ix86_fpmath, false);
4510
4511 gcc_assert (ptr->arch < PROCESSOR_max);
4512 fprintf (file, "%*sarch = %d (%s)\n",
4513 indent, "",
4514 ptr->arch, processor_target_table[ptr->arch].name);
4515
4516 gcc_assert (ptr->tune < PROCESSOR_max);
4517 fprintf (file, "%*stune = %d (%s)\n",
4518 indent, "",
4519 ptr->tune, processor_target_table[ptr->tune].name);
4520
4521 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4522
4523 if (target_string)
4524 {
4525 fprintf (file, "%*s%s\n", indent, "", target_string);
4526 free (target_string);
4527 }
4528 }
4529
4530 \f
4531 /* Inner function to process the attribute((target(...))), take an argument and
4532 set the current options from the argument. If we have a list, recursively go
4533 over the list. */
4534
4535 static bool
4536 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4537 struct gcc_options *opts,
4538 struct gcc_options *opts_set,
4539 struct gcc_options *enum_opts_set)
4540 {
4541 char *next_optstr;
4542 bool ret = true;
4543
4544 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4545 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4546 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4547 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4548 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4549
4550 enum ix86_opt_type
4551 {
4552 ix86_opt_unknown,
4553 ix86_opt_yes,
4554 ix86_opt_no,
4555 ix86_opt_str,
4556 ix86_opt_enum,
4557 ix86_opt_isa
4558 };
4559
4560 static const struct
4561 {
4562 const char *string;
4563 size_t len;
4564 enum ix86_opt_type type;
4565 int opt;
4566 int mask;
4567 } attrs[] = {
4568 /* isa options */
4569 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4570 IX86_ATTR_ISA ("abm", OPT_mabm),
4571 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4572 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4573 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4574 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4575 IX86_ATTR_ISA ("aes", OPT_maes),
4576 IX86_ATTR_ISA ("sha", OPT_msha),
4577 IX86_ATTR_ISA ("avx", OPT_mavx),
4578 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4579 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4580 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4581 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4582 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4583 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4584 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4585 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4586 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4587 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4588 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4589 IX86_ATTR_ISA ("sse", OPT_msse),
4590 IX86_ATTR_ISA ("sse2", OPT_msse2),
4591 IX86_ATTR_ISA ("sse3", OPT_msse3),
4592 IX86_ATTR_ISA ("sse4", OPT_msse4),
4593 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4594 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4595 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4596 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4597 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4598 IX86_ATTR_ISA ("fma", OPT_mfma),
4599 IX86_ATTR_ISA ("xop", OPT_mxop),
4600 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4601 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4602 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4603 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4604 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4605 IX86_ATTR_ISA ("hle", OPT_mhle),
4606 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4607 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4608 IX86_ATTR_ISA ("adx", OPT_madx),
4609 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4610 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4611 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4612 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4613 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4614 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4615 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4616
4617 /* enum options */
4618 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4619
4620 /* string options */
4621 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4622 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4623
4624 /* flag options */
4625 IX86_ATTR_YES ("cld",
4626 OPT_mcld,
4627 MASK_CLD),
4628
4629 IX86_ATTR_NO ("fancy-math-387",
4630 OPT_mfancy_math_387,
4631 MASK_NO_FANCY_MATH_387),
4632
4633 IX86_ATTR_YES ("ieee-fp",
4634 OPT_mieee_fp,
4635 MASK_IEEE_FP),
4636
4637 IX86_ATTR_YES ("inline-all-stringops",
4638 OPT_minline_all_stringops,
4639 MASK_INLINE_ALL_STRINGOPS),
4640
4641 IX86_ATTR_YES ("inline-stringops-dynamically",
4642 OPT_minline_stringops_dynamically,
4643 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4644
4645 IX86_ATTR_NO ("align-stringops",
4646 OPT_mno_align_stringops,
4647 MASK_NO_ALIGN_STRINGOPS),
4648
4649 IX86_ATTR_YES ("recip",
4650 OPT_mrecip,
4651 MASK_RECIP),
4652
4653 };
4654
4655 /* If this is a list, recurse to get the options. */
4656 if (TREE_CODE (args) == TREE_LIST)
4657 {
4658 bool ret = true;
4659
4660 for (; args; args = TREE_CHAIN (args))
4661 if (TREE_VALUE (args)
4662 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4663 p_strings, opts, opts_set,
4664 enum_opts_set))
4665 ret = false;
4666
4667 return ret;
4668 }
4669
4670 else if (TREE_CODE (args) != STRING_CST)
4671 {
4672 error ("attribute %<target%> argument not a string");
4673 return false;
4674 }
4675
4676 /* Handle multiple arguments separated by commas. */
4677 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4678
4679 while (next_optstr && *next_optstr != '\0')
4680 {
4681 char *p = next_optstr;
4682 char *orig_p = p;
4683 char *comma = strchr (next_optstr, ',');
4684 const char *opt_string;
4685 size_t len, opt_len;
4686 int opt;
4687 bool opt_set_p;
4688 char ch;
4689 unsigned i;
4690 enum ix86_opt_type type = ix86_opt_unknown;
4691 int mask = 0;
4692
4693 if (comma)
4694 {
4695 *comma = '\0';
4696 len = comma - next_optstr;
4697 next_optstr = comma + 1;
4698 }
4699 else
4700 {
4701 len = strlen (p);
4702 next_optstr = NULL;
4703 }
4704
4705 /* Recognize no-xxx. */
4706 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4707 {
4708 opt_set_p = false;
4709 p += 3;
4710 len -= 3;
4711 }
4712 else
4713 opt_set_p = true;
4714
4715 /* Find the option. */
4716 ch = *p;
4717 opt = N_OPTS;
4718 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4719 {
4720 type = attrs[i].type;
4721 opt_len = attrs[i].len;
4722 if (ch == attrs[i].string[0]
4723 && ((type != ix86_opt_str && type != ix86_opt_enum)
4724 ? len == opt_len
4725 : len > opt_len)
4726 && memcmp (p, attrs[i].string, opt_len) == 0)
4727 {
4728 opt = attrs[i].opt;
4729 mask = attrs[i].mask;
4730 opt_string = attrs[i].string;
4731 break;
4732 }
4733 }
4734
4735 /* Process the option. */
4736 if (opt == N_OPTS)
4737 {
4738 error ("attribute(target(\"%s\")) is unknown", orig_p);
4739 ret = false;
4740 }
4741
4742 else if (type == ix86_opt_isa)
4743 {
4744 struct cl_decoded_option decoded;
4745
4746 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4747 ix86_handle_option (opts, opts_set,
4748 &decoded, input_location);
4749 }
4750
4751 else if (type == ix86_opt_yes || type == ix86_opt_no)
4752 {
4753 if (type == ix86_opt_no)
4754 opt_set_p = !opt_set_p;
4755
4756 if (opt_set_p)
4757 opts->x_target_flags |= mask;
4758 else
4759 opts->x_target_flags &= ~mask;
4760 }
4761
4762 else if (type == ix86_opt_str)
4763 {
4764 if (p_strings[opt])
4765 {
4766 error ("option(\"%s\") was already specified", opt_string);
4767 ret = false;
4768 }
4769 else
4770 p_strings[opt] = xstrdup (p + opt_len);
4771 }
4772
4773 else if (type == ix86_opt_enum)
4774 {
4775 bool arg_ok;
4776 int value;
4777
4778 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4779 if (arg_ok)
4780 set_option (opts, enum_opts_set, opt, value,
4781 p + opt_len, DK_UNSPECIFIED, input_location,
4782 global_dc);
4783 else
4784 {
4785 error ("attribute(target(\"%s\")) is unknown", orig_p);
4786 ret = false;
4787 }
4788 }
4789
4790 else
4791 gcc_unreachable ();
4792 }
4793
4794 return ret;
4795 }
4796
4797 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4798
4799 tree
4800 ix86_valid_target_attribute_tree (tree args,
4801 struct gcc_options *opts,
4802 struct gcc_options *opts_set)
4803 {
4804 const char *orig_arch_string = opts->x_ix86_arch_string;
4805 const char *orig_tune_string = opts->x_ix86_tune_string;
4806 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4807 int orig_tune_defaulted = ix86_tune_defaulted;
4808 int orig_arch_specified = ix86_arch_specified;
4809 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4810 tree t = NULL_TREE;
4811 int i;
4812 struct cl_target_option *def
4813 = TREE_TARGET_OPTION (target_option_default_node);
4814 struct gcc_options enum_opts_set;
4815
4816 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4817
4818 /* Process each of the options on the chain. */
4819 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4820 opts_set, &enum_opts_set))
4821 return error_mark_node;
4822
4823 /* If the changed options are different from the default, rerun
4824 ix86_option_override_internal, and then save the options away.
4825 The string options are are attribute options, and will be undone
4826 when we copy the save structure. */
4827 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4828 || opts->x_target_flags != def->x_target_flags
4829 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4830 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4831 || enum_opts_set.x_ix86_fpmath)
4832 {
4833 /* If we are using the default tune= or arch=, undo the string assigned,
4834 and use the default. */
4835 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4836 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4837 else if (!orig_arch_specified)
4838 opts->x_ix86_arch_string = NULL;
4839
4840 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4841 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4842 else if (orig_tune_defaulted)
4843 opts->x_ix86_tune_string = NULL;
4844
4845 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4846 if (enum_opts_set.x_ix86_fpmath)
4847 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4848 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4849 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4850 {
4851 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4852 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4853 }
4854
4855 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4856 ix86_option_override_internal (false, opts, opts_set);
4857
4858 /* Add any builtin functions with the new isa if any. */
4859 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4860
4861 /* Save the current options unless we are validating options for
4862 #pragma. */
4863 t = build_target_option_node (opts);
4864
4865 opts->x_ix86_arch_string = orig_arch_string;
4866 opts->x_ix86_tune_string = orig_tune_string;
4867 opts_set->x_ix86_fpmath = orig_fpmath_set;
4868
4869 /* Free up memory allocated to hold the strings */
4870 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4871 free (option_strings[i]);
4872 }
4873
4874 return t;
4875 }
4876
4877 /* Hook to validate attribute((target("string"))). */
4878
4879 static bool
4880 ix86_valid_target_attribute_p (tree fndecl,
4881 tree ARG_UNUSED (name),
4882 tree args,
4883 int ARG_UNUSED (flags))
4884 {
4885 struct gcc_options func_options;
4886 tree new_target, new_optimize;
4887 bool ret = true;
4888
4889 /* attribute((target("default"))) does nothing, beyond
4890 affecting multi-versioning. */
4891 if (TREE_VALUE (args)
4892 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4893 && TREE_CHAIN (args) == NULL_TREE
4894 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4895 return true;
4896
4897 tree old_optimize = build_optimization_node (&global_options);
4898
4899 /* Get the optimization options of the current function. */
4900 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4901
4902 if (!func_optimize)
4903 func_optimize = old_optimize;
4904
4905 /* Init func_options. */
4906 memset (&func_options, 0, sizeof (func_options));
4907 init_options_struct (&func_options, NULL);
4908 lang_hooks.init_options_struct (&func_options);
4909
4910 cl_optimization_restore (&func_options,
4911 TREE_OPTIMIZATION (func_optimize));
4912
4913 /* Initialize func_options to the default before its target options can
4914 be set. */
4915 cl_target_option_restore (&func_options,
4916 TREE_TARGET_OPTION (target_option_default_node));
4917
4918 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4919 &global_options_set);
4920
4921 new_optimize = build_optimization_node (&func_options);
4922
4923 if (new_target == error_mark_node)
4924 ret = false;
4925
4926 else if (fndecl && new_target)
4927 {
4928 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4929
4930 if (old_optimize != new_optimize)
4931 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4932 }
4933
4934 return ret;
4935 }
4936
4937 \f
4938 /* Hook to determine if one function can safely inline another. */
4939
4940 static bool
4941 ix86_can_inline_p (tree caller, tree callee)
4942 {
4943 bool ret = false;
4944 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4945 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4946
4947 /* If callee has no option attributes, then it is ok to inline. */
4948 if (!callee_tree)
4949 ret = true;
4950
4951 /* If caller has no option attributes, but callee does then it is not ok to
4952 inline. */
4953 else if (!caller_tree)
4954 ret = false;
4955
4956 else
4957 {
4958 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4959 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4960
4961 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4962 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4963 function. */
4964 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4965 != callee_opts->x_ix86_isa_flags)
4966 ret = false;
4967
4968 /* See if we have the same non-isa options. */
4969 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4970 ret = false;
4971
4972 /* See if arch, tune, etc. are the same. */
4973 else if (caller_opts->arch != callee_opts->arch)
4974 ret = false;
4975
4976 else if (caller_opts->tune != callee_opts->tune)
4977 ret = false;
4978
4979 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4980 ret = false;
4981
4982 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4983 ret = false;
4984
4985 else
4986 ret = true;
4987 }
4988
4989 return ret;
4990 }
4991
4992 \f
4993 /* Remember the last target of ix86_set_current_function. */
4994 static GTY(()) tree ix86_previous_fndecl;
4995
4996 /* Invalidate ix86_previous_fndecl cache. */
4997 void
4998 ix86_reset_previous_fndecl (void)
4999 {
5000 ix86_previous_fndecl = NULL_TREE;
5001 }
5002
5003 /* Establish appropriate back-end context for processing the function
5004 FNDECL. The argument might be NULL to indicate processing at top
5005 level, outside of any function scope. */
5006 static void
5007 ix86_set_current_function (tree fndecl)
5008 {
5009 /* Only change the context if the function changes. This hook is called
5010 several times in the course of compiling a function, and we don't want to
5011 slow things down too much or call target_reinit when it isn't safe. */
5012 if (fndecl && fndecl != ix86_previous_fndecl)
5013 {
5014 tree old_tree = (ix86_previous_fndecl
5015 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5016 : NULL_TREE);
5017
5018 tree new_tree = (fndecl
5019 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5020 : NULL_TREE);
5021
5022 ix86_previous_fndecl = fndecl;
5023 if (old_tree == new_tree)
5024 ;
5025
5026 else if (new_tree)
5027 {
5028 cl_target_option_restore (&global_options,
5029 TREE_TARGET_OPTION (new_tree));
5030 if (TREE_TARGET_GLOBALS (new_tree))
5031 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5032 else
5033 TREE_TARGET_GLOBALS (new_tree)
5034 = save_target_globals_default_opts ();
5035 }
5036
5037 else if (old_tree)
5038 {
5039 new_tree = target_option_current_node;
5040 cl_target_option_restore (&global_options,
5041 TREE_TARGET_OPTION (new_tree));
5042 if (TREE_TARGET_GLOBALS (new_tree))
5043 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5044 else if (new_tree == target_option_default_node)
5045 restore_target_globals (&default_target_globals);
5046 else
5047 TREE_TARGET_GLOBALS (new_tree)
5048 = save_target_globals_default_opts ();
5049 }
5050 }
5051 }
5052
5053 \f
5054 /* Return true if this goes in large data/bss. */
5055
5056 static bool
5057 ix86_in_large_data_p (tree exp)
5058 {
5059 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5060 return false;
5061
5062 /* Functions are never large data. */
5063 if (TREE_CODE (exp) == FUNCTION_DECL)
5064 return false;
5065
5066 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5067 {
5068 const char *section = DECL_SECTION_NAME (exp);
5069 if (strcmp (section, ".ldata") == 0
5070 || strcmp (section, ".lbss") == 0)
5071 return true;
5072 return false;
5073 }
5074 else
5075 {
5076 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5077
5078 /* If this is an incomplete type with size 0, then we can't put it
5079 in data because it might be too big when completed. Also,
5080 int_size_in_bytes returns -1 if size can vary or is larger than
5081 an integer in which case also it is safer to assume that it goes in
5082 large data. */
5083 if (size <= 0 || size > ix86_section_threshold)
5084 return true;
5085 }
5086
5087 return false;
5088 }
5089
5090 /* Switch to the appropriate section for output of DECL.
5091 DECL is either a `VAR_DECL' node or a constant of some sort.
5092 RELOC indicates whether forming the initial value of DECL requires
5093 link-time relocations. */
5094
5095 ATTRIBUTE_UNUSED static section *
5096 x86_64_elf_select_section (tree decl, int reloc,
5097 unsigned HOST_WIDE_INT align)
5098 {
5099 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5100 && ix86_in_large_data_p (decl))
5101 {
5102 const char *sname = NULL;
5103 unsigned int flags = SECTION_WRITE;
5104 switch (categorize_decl_for_section (decl, reloc))
5105 {
5106 case SECCAT_DATA:
5107 sname = ".ldata";
5108 break;
5109 case SECCAT_DATA_REL:
5110 sname = ".ldata.rel";
5111 break;
5112 case SECCAT_DATA_REL_LOCAL:
5113 sname = ".ldata.rel.local";
5114 break;
5115 case SECCAT_DATA_REL_RO:
5116 sname = ".ldata.rel.ro";
5117 break;
5118 case SECCAT_DATA_REL_RO_LOCAL:
5119 sname = ".ldata.rel.ro.local";
5120 break;
5121 case SECCAT_BSS:
5122 sname = ".lbss";
5123 flags |= SECTION_BSS;
5124 break;
5125 case SECCAT_RODATA:
5126 case SECCAT_RODATA_MERGE_STR:
5127 case SECCAT_RODATA_MERGE_STR_INIT:
5128 case SECCAT_RODATA_MERGE_CONST:
5129 sname = ".lrodata";
5130 flags = 0;
5131 break;
5132 case SECCAT_SRODATA:
5133 case SECCAT_SDATA:
5134 case SECCAT_SBSS:
5135 gcc_unreachable ();
5136 case SECCAT_TEXT:
5137 case SECCAT_TDATA:
5138 case SECCAT_TBSS:
5139 /* We don't split these for medium model. Place them into
5140 default sections and hope for best. */
5141 break;
5142 }
5143 if (sname)
5144 {
5145 /* We might get called with string constants, but get_named_section
5146 doesn't like them as they are not DECLs. Also, we need to set
5147 flags in that case. */
5148 if (!DECL_P (decl))
5149 return get_section (sname, flags, NULL);
5150 return get_named_section (decl, sname, reloc);
5151 }
5152 }
5153 return default_elf_select_section (decl, reloc, align);
5154 }
5155
5156 /* Select a set of attributes for section NAME based on the properties
5157 of DECL and whether or not RELOC indicates that DECL's initializer
5158 might contain runtime relocations. */
5159
5160 static unsigned int ATTRIBUTE_UNUSED
5161 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5162 {
5163 unsigned int flags = default_section_type_flags (decl, name, reloc);
5164
5165 if (decl == NULL_TREE
5166 && (strcmp (name, ".ldata.rel.ro") == 0
5167 || strcmp (name, ".ldata.rel.ro.local") == 0))
5168 flags |= SECTION_RELRO;
5169
5170 if (strcmp (name, ".lbss") == 0
5171 || strncmp (name, ".lbss.", 5) == 0
5172 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5173 flags |= SECTION_BSS;
5174
5175 return flags;
5176 }
5177
5178 /* Build up a unique section name, expressed as a
5179 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5180 RELOC indicates whether the initial value of EXP requires
5181 link-time relocations. */
5182
5183 static void ATTRIBUTE_UNUSED
5184 x86_64_elf_unique_section (tree decl, int reloc)
5185 {
5186 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5187 && ix86_in_large_data_p (decl))
5188 {
5189 const char *prefix = NULL;
5190 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5191 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5192
5193 switch (categorize_decl_for_section (decl, reloc))
5194 {
5195 case SECCAT_DATA:
5196 case SECCAT_DATA_REL:
5197 case SECCAT_DATA_REL_LOCAL:
5198 case SECCAT_DATA_REL_RO:
5199 case SECCAT_DATA_REL_RO_LOCAL:
5200 prefix = one_only ? ".ld" : ".ldata";
5201 break;
5202 case SECCAT_BSS:
5203 prefix = one_only ? ".lb" : ".lbss";
5204 break;
5205 case SECCAT_RODATA:
5206 case SECCAT_RODATA_MERGE_STR:
5207 case SECCAT_RODATA_MERGE_STR_INIT:
5208 case SECCAT_RODATA_MERGE_CONST:
5209 prefix = one_only ? ".lr" : ".lrodata";
5210 break;
5211 case SECCAT_SRODATA:
5212 case SECCAT_SDATA:
5213 case SECCAT_SBSS:
5214 gcc_unreachable ();
5215 case SECCAT_TEXT:
5216 case SECCAT_TDATA:
5217 case SECCAT_TBSS:
5218 /* We don't split these for medium model. Place them into
5219 default sections and hope for best. */
5220 break;
5221 }
5222 if (prefix)
5223 {
5224 const char *name, *linkonce;
5225 char *string;
5226
5227 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5228 name = targetm.strip_name_encoding (name);
5229
5230 /* If we're using one_only, then there needs to be a .gnu.linkonce
5231 prefix to the section name. */
5232 linkonce = one_only ? ".gnu.linkonce" : "";
5233
5234 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5235
5236 set_decl_section_name (decl, string);
5237 return;
5238 }
5239 }
5240 default_unique_section (decl, reloc);
5241 }
5242
5243 #ifdef COMMON_ASM_OP
5244 /* This says how to output assembler code to declare an
5245 uninitialized external linkage data object.
5246
5247 For medium model x86-64 we need to use .largecomm opcode for
5248 large objects. */
5249 void
5250 x86_elf_aligned_common (FILE *file,
5251 const char *name, unsigned HOST_WIDE_INT size,
5252 int align)
5253 {
5254 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5255 && size > (unsigned int)ix86_section_threshold)
5256 fputs (".largecomm\t", file);
5257 else
5258 fputs (COMMON_ASM_OP, file);
5259 assemble_name (file, name);
5260 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5261 size, align / BITS_PER_UNIT);
5262 }
5263 #endif
5264
5265 /* Utility function for targets to use in implementing
5266 ASM_OUTPUT_ALIGNED_BSS. */
5267
5268 void
5269 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5270 unsigned HOST_WIDE_INT size, int align)
5271 {
5272 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5273 && size > (unsigned int)ix86_section_threshold)
5274 switch_to_section (get_named_section (decl, ".lbss", 0));
5275 else
5276 switch_to_section (bss_section);
5277 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5278 #ifdef ASM_DECLARE_OBJECT_NAME
5279 last_assemble_variable_decl = decl;
5280 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5281 #else
5282 /* Standard thing is just output label for the object. */
5283 ASM_OUTPUT_LABEL (file, name);
5284 #endif /* ASM_DECLARE_OBJECT_NAME */
5285 ASM_OUTPUT_SKIP (file, size ? size : 1);
5286 }
5287 \f
5288 /* Decide whether we must probe the stack before any space allocation
5289 on this target. It's essentially TARGET_STACK_PROBE except when
5290 -fstack-check causes the stack to be already probed differently. */
5291
5292 bool
5293 ix86_target_stack_probe (void)
5294 {
5295 /* Do not probe the stack twice if static stack checking is enabled. */
5296 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5297 return false;
5298
5299 return TARGET_STACK_PROBE;
5300 }
5301 \f
5302 /* Decide whether we can make a sibling call to a function. DECL is the
5303 declaration of the function being targeted by the call and EXP is the
5304 CALL_EXPR representing the call. */
5305
5306 static bool
5307 ix86_function_ok_for_sibcall (tree decl, tree exp)
5308 {
5309 tree type, decl_or_type;
5310 rtx a, b;
5311
5312 /* If we are generating position-independent code, we cannot sibcall
5313 optimize any indirect call, or a direct call to a global function,
5314 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5315 if (!TARGET_MACHO
5316 && !TARGET_64BIT
5317 && flag_pic
5318 && (!decl || !targetm.binds_local_p (decl)))
5319 return false;
5320
5321 /* If we need to align the outgoing stack, then sibcalling would
5322 unalign the stack, which may break the called function. */
5323 if (ix86_minimum_incoming_stack_boundary (true)
5324 < PREFERRED_STACK_BOUNDARY)
5325 return false;
5326
5327 if (decl)
5328 {
5329 decl_or_type = decl;
5330 type = TREE_TYPE (decl);
5331 }
5332 else
5333 {
5334 /* We're looking at the CALL_EXPR, we need the type of the function. */
5335 type = CALL_EXPR_FN (exp); /* pointer expression */
5336 type = TREE_TYPE (type); /* pointer type */
5337 type = TREE_TYPE (type); /* function type */
5338 decl_or_type = type;
5339 }
5340
5341 /* Check that the return value locations are the same. Like
5342 if we are returning floats on the 80387 register stack, we cannot
5343 make a sibcall from a function that doesn't return a float to a
5344 function that does or, conversely, from a function that does return
5345 a float to a function that doesn't; the necessary stack adjustment
5346 would not be executed. This is also the place we notice
5347 differences in the return value ABI. Note that it is ok for one
5348 of the functions to have void return type as long as the return
5349 value of the other is passed in a register. */
5350 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5351 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5352 cfun->decl, false);
5353 if (STACK_REG_P (a) || STACK_REG_P (b))
5354 {
5355 if (!rtx_equal_p (a, b))
5356 return false;
5357 }
5358 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5359 ;
5360 else if (!rtx_equal_p (a, b))
5361 return false;
5362
5363 if (TARGET_64BIT)
5364 {
5365 /* The SYSV ABI has more call-clobbered registers;
5366 disallow sibcalls from MS to SYSV. */
5367 if (cfun->machine->call_abi == MS_ABI
5368 && ix86_function_type_abi (type) == SYSV_ABI)
5369 return false;
5370 }
5371 else
5372 {
5373 /* If this call is indirect, we'll need to be able to use a
5374 call-clobbered register for the address of the target function.
5375 Make sure that all such registers are not used for passing
5376 parameters. Note that DLLIMPORT functions are indirect. */
5377 if (!decl
5378 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5379 {
5380 if (ix86_function_regparm (type, NULL) >= 3)
5381 {
5382 /* ??? Need to count the actual number of registers to be used,
5383 not the possible number of registers. Fix later. */
5384 return false;
5385 }
5386 }
5387 }
5388
5389 /* Otherwise okay. That also includes certain types of indirect calls. */
5390 return true;
5391 }
5392
5393 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5394 and "sseregparm" calling convention attributes;
5395 arguments as in struct attribute_spec.handler. */
5396
5397 static tree
5398 ix86_handle_cconv_attribute (tree *node, tree name,
5399 tree args,
5400 int,
5401 bool *no_add_attrs)
5402 {
5403 if (TREE_CODE (*node) != FUNCTION_TYPE
5404 && TREE_CODE (*node) != METHOD_TYPE
5405 && TREE_CODE (*node) != FIELD_DECL
5406 && TREE_CODE (*node) != TYPE_DECL)
5407 {
5408 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5409 name);
5410 *no_add_attrs = true;
5411 return NULL_TREE;
5412 }
5413
5414 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5415 if (is_attribute_p ("regparm", name))
5416 {
5417 tree cst;
5418
5419 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5420 {
5421 error ("fastcall and regparm attributes are not compatible");
5422 }
5423
5424 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5425 {
5426 error ("regparam and thiscall attributes are not compatible");
5427 }
5428
5429 cst = TREE_VALUE (args);
5430 if (TREE_CODE (cst) != INTEGER_CST)
5431 {
5432 warning (OPT_Wattributes,
5433 "%qE attribute requires an integer constant argument",
5434 name);
5435 *no_add_attrs = true;
5436 }
5437 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5438 {
5439 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5440 name, REGPARM_MAX);
5441 *no_add_attrs = true;
5442 }
5443
5444 return NULL_TREE;
5445 }
5446
5447 if (TARGET_64BIT)
5448 {
5449 /* Do not warn when emulating the MS ABI. */
5450 if ((TREE_CODE (*node) != FUNCTION_TYPE
5451 && TREE_CODE (*node) != METHOD_TYPE)
5452 || ix86_function_type_abi (*node) != MS_ABI)
5453 warning (OPT_Wattributes, "%qE attribute ignored",
5454 name);
5455 *no_add_attrs = true;
5456 return NULL_TREE;
5457 }
5458
5459 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5460 if (is_attribute_p ("fastcall", name))
5461 {
5462 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5463 {
5464 error ("fastcall and cdecl attributes are not compatible");
5465 }
5466 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5467 {
5468 error ("fastcall and stdcall attributes are not compatible");
5469 }
5470 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5471 {
5472 error ("fastcall and regparm attributes are not compatible");
5473 }
5474 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5475 {
5476 error ("fastcall and thiscall attributes are not compatible");
5477 }
5478 }
5479
5480 /* Can combine stdcall with fastcall (redundant), regparm and
5481 sseregparm. */
5482 else if (is_attribute_p ("stdcall", name))
5483 {
5484 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5485 {
5486 error ("stdcall and cdecl attributes are not compatible");
5487 }
5488 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5489 {
5490 error ("stdcall and fastcall attributes are not compatible");
5491 }
5492 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5493 {
5494 error ("stdcall and thiscall attributes are not compatible");
5495 }
5496 }
5497
5498 /* Can combine cdecl with regparm and sseregparm. */
5499 else if (is_attribute_p ("cdecl", name))
5500 {
5501 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5502 {
5503 error ("stdcall and cdecl attributes are not compatible");
5504 }
5505 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5506 {
5507 error ("fastcall and cdecl attributes are not compatible");
5508 }
5509 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5510 {
5511 error ("cdecl and thiscall attributes are not compatible");
5512 }
5513 }
5514 else if (is_attribute_p ("thiscall", name))
5515 {
5516 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5517 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5518 name);
5519 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5520 {
5521 error ("stdcall and thiscall attributes are not compatible");
5522 }
5523 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5524 {
5525 error ("fastcall and thiscall attributes are not compatible");
5526 }
5527 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5528 {
5529 error ("cdecl and thiscall attributes are not compatible");
5530 }
5531 }
5532
5533 /* Can combine sseregparm with all attributes. */
5534
5535 return NULL_TREE;
5536 }
5537
5538 /* The transactional memory builtins are implicitly regparm or fastcall
5539 depending on the ABI. Override the generic do-nothing attribute that
5540 these builtins were declared with, and replace it with one of the two
5541 attributes that we expect elsewhere. */
5542
5543 static tree
5544 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5545 int flags, bool *no_add_attrs)
5546 {
5547 tree alt;
5548
5549 /* In no case do we want to add the placeholder attribute. */
5550 *no_add_attrs = true;
5551
5552 /* The 64-bit ABI is unchanged for transactional memory. */
5553 if (TARGET_64BIT)
5554 return NULL_TREE;
5555
5556 /* ??? Is there a better way to validate 32-bit windows? We have
5557 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5558 if (CHECK_STACK_LIMIT > 0)
5559 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5560 else
5561 {
5562 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5563 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5564 }
5565 decl_attributes (node, alt, flags);
5566
5567 return NULL_TREE;
5568 }
5569
5570 /* This function determines from TYPE the calling-convention. */
5571
5572 unsigned int
5573 ix86_get_callcvt (const_tree type)
5574 {
5575 unsigned int ret = 0;
5576 bool is_stdarg;
5577 tree attrs;
5578
5579 if (TARGET_64BIT)
5580 return IX86_CALLCVT_CDECL;
5581
5582 attrs = TYPE_ATTRIBUTES (type);
5583 if (attrs != NULL_TREE)
5584 {
5585 if (lookup_attribute ("cdecl", attrs))
5586 ret |= IX86_CALLCVT_CDECL;
5587 else if (lookup_attribute ("stdcall", attrs))
5588 ret |= IX86_CALLCVT_STDCALL;
5589 else if (lookup_attribute ("fastcall", attrs))
5590 ret |= IX86_CALLCVT_FASTCALL;
5591 else if (lookup_attribute ("thiscall", attrs))
5592 ret |= IX86_CALLCVT_THISCALL;
5593
5594 /* Regparam isn't allowed for thiscall and fastcall. */
5595 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5596 {
5597 if (lookup_attribute ("regparm", attrs))
5598 ret |= IX86_CALLCVT_REGPARM;
5599 if (lookup_attribute ("sseregparm", attrs))
5600 ret |= IX86_CALLCVT_SSEREGPARM;
5601 }
5602
5603 if (IX86_BASE_CALLCVT(ret) != 0)
5604 return ret;
5605 }
5606
5607 is_stdarg = stdarg_p (type);
5608 if (TARGET_RTD && !is_stdarg)
5609 return IX86_CALLCVT_STDCALL | ret;
5610
5611 if (ret != 0
5612 || is_stdarg
5613 || TREE_CODE (type) != METHOD_TYPE
5614 || ix86_function_type_abi (type) != MS_ABI)
5615 return IX86_CALLCVT_CDECL | ret;
5616
5617 return IX86_CALLCVT_THISCALL;
5618 }
5619
5620 /* Return 0 if the attributes for two types are incompatible, 1 if they
5621 are compatible, and 2 if they are nearly compatible (which causes a
5622 warning to be generated). */
5623
5624 static int
5625 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5626 {
5627 unsigned int ccvt1, ccvt2;
5628
5629 if (TREE_CODE (type1) != FUNCTION_TYPE
5630 && TREE_CODE (type1) != METHOD_TYPE)
5631 return 1;
5632
5633 ccvt1 = ix86_get_callcvt (type1);
5634 ccvt2 = ix86_get_callcvt (type2);
5635 if (ccvt1 != ccvt2)
5636 return 0;
5637 if (ix86_function_regparm (type1, NULL)
5638 != ix86_function_regparm (type2, NULL))
5639 return 0;
5640
5641 return 1;
5642 }
5643 \f
5644 /* Return the regparm value for a function with the indicated TYPE and DECL.
5645 DECL may be NULL when calling function indirectly
5646 or considering a libcall. */
5647
5648 static int
5649 ix86_function_regparm (const_tree type, const_tree decl)
5650 {
5651 tree attr;
5652 int regparm;
5653 unsigned int ccvt;
5654
5655 if (TARGET_64BIT)
5656 return (ix86_function_type_abi (type) == SYSV_ABI
5657 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5658 ccvt = ix86_get_callcvt (type);
5659 regparm = ix86_regparm;
5660
5661 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5662 {
5663 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5664 if (attr)
5665 {
5666 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5667 return regparm;
5668 }
5669 }
5670 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5671 return 2;
5672 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5673 return 1;
5674
5675 /* Use register calling convention for local functions when possible. */
5676 if (decl
5677 && TREE_CODE (decl) == FUNCTION_DECL
5678 /* Caller and callee must agree on the calling convention, so
5679 checking here just optimize means that with
5680 __attribute__((optimize (...))) caller could use regparm convention
5681 and callee not, or vice versa. Instead look at whether the callee
5682 is optimized or not. */
5683 && opt_for_fn (decl, optimize)
5684 && !(profile_flag && !flag_fentry))
5685 {
5686 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5687 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5688 if (i && i->local && i->can_change_signature)
5689 {
5690 int local_regparm, globals = 0, regno;
5691
5692 /* Make sure no regparm register is taken by a
5693 fixed register variable. */
5694 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5695 if (fixed_regs[local_regparm])
5696 break;
5697
5698 /* We don't want to use regparm(3) for nested functions as
5699 these use a static chain pointer in the third argument. */
5700 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5701 local_regparm = 2;
5702
5703 /* In 32-bit mode save a register for the split stack. */
5704 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5705 local_regparm = 2;
5706
5707 /* Each fixed register usage increases register pressure,
5708 so less registers should be used for argument passing.
5709 This functionality can be overriden by an explicit
5710 regparm value. */
5711 for (regno = AX_REG; regno <= DI_REG; regno++)
5712 if (fixed_regs[regno])
5713 globals++;
5714
5715 local_regparm
5716 = globals < local_regparm ? local_regparm - globals : 0;
5717
5718 if (local_regparm > regparm)
5719 regparm = local_regparm;
5720 }
5721 }
5722
5723 return regparm;
5724 }
5725
5726 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5727 DFmode (2) arguments in SSE registers for a function with the
5728 indicated TYPE and DECL. DECL may be NULL when calling function
5729 indirectly or considering a libcall. Otherwise return 0. */
5730
5731 static int
5732 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5733 {
5734 gcc_assert (!TARGET_64BIT);
5735
5736 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5737 by the sseregparm attribute. */
5738 if (TARGET_SSEREGPARM
5739 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5740 {
5741 if (!TARGET_SSE)
5742 {
5743 if (warn)
5744 {
5745 if (decl)
5746 error ("calling %qD with attribute sseregparm without "
5747 "SSE/SSE2 enabled", decl);
5748 else
5749 error ("calling %qT with attribute sseregparm without "
5750 "SSE/SSE2 enabled", type);
5751 }
5752 return 0;
5753 }
5754
5755 return 2;
5756 }
5757
5758 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5759 (and DFmode for SSE2) arguments in SSE registers. */
5760 if (decl && TARGET_SSE_MATH && optimize
5761 && !(profile_flag && !flag_fentry))
5762 {
5763 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5764 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5765 if (i && i->local && i->can_change_signature)
5766 return TARGET_SSE2 ? 2 : 1;
5767 }
5768
5769 return 0;
5770 }
5771
5772 /* Return true if EAX is live at the start of the function. Used by
5773 ix86_expand_prologue to determine if we need special help before
5774 calling allocate_stack_worker. */
5775
5776 static bool
5777 ix86_eax_live_at_start_p (void)
5778 {
5779 /* Cheat. Don't bother working forward from ix86_function_regparm
5780 to the function type to whether an actual argument is located in
5781 eax. Instead just look at cfg info, which is still close enough
5782 to correct at this point. This gives false positives for broken
5783 functions that might use uninitialized data that happens to be
5784 allocated in eax, but who cares? */
5785 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5786 }
5787
5788 static bool
5789 ix86_keep_aggregate_return_pointer (tree fntype)
5790 {
5791 tree attr;
5792
5793 if (!TARGET_64BIT)
5794 {
5795 attr = lookup_attribute ("callee_pop_aggregate_return",
5796 TYPE_ATTRIBUTES (fntype));
5797 if (attr)
5798 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5799
5800 /* For 32-bit MS-ABI the default is to keep aggregate
5801 return pointer. */
5802 if (ix86_function_type_abi (fntype) == MS_ABI)
5803 return true;
5804 }
5805 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5806 }
5807
5808 /* Value is the number of bytes of arguments automatically
5809 popped when returning from a subroutine call.
5810 FUNDECL is the declaration node of the function (as a tree),
5811 FUNTYPE is the data type of the function (as a tree),
5812 or for a library call it is an identifier node for the subroutine name.
5813 SIZE is the number of bytes of arguments passed on the stack.
5814
5815 On the 80386, the RTD insn may be used to pop them if the number
5816 of args is fixed, but if the number is variable then the caller
5817 must pop them all. RTD can't be used for library calls now
5818 because the library is compiled with the Unix compiler.
5819 Use of RTD is a selectable option, since it is incompatible with
5820 standard Unix calling sequences. If the option is not selected,
5821 the caller must always pop the args.
5822
5823 The attribute stdcall is equivalent to RTD on a per module basis. */
5824
5825 static int
5826 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5827 {
5828 unsigned int ccvt;
5829
5830 /* None of the 64-bit ABIs pop arguments. */
5831 if (TARGET_64BIT)
5832 return 0;
5833
5834 ccvt = ix86_get_callcvt (funtype);
5835
5836 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5837 | IX86_CALLCVT_THISCALL)) != 0
5838 && ! stdarg_p (funtype))
5839 return size;
5840
5841 /* Lose any fake structure return argument if it is passed on the stack. */
5842 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5843 && !ix86_keep_aggregate_return_pointer (funtype))
5844 {
5845 int nregs = ix86_function_regparm (funtype, fundecl);
5846 if (nregs == 0)
5847 return GET_MODE_SIZE (Pmode);
5848 }
5849
5850 return 0;
5851 }
5852
5853 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5854
5855 static bool
5856 ix86_legitimate_combined_insn (rtx_insn *insn)
5857 {
5858 /* Check operand constraints in case hard registers were propagated
5859 into insn pattern. This check prevents combine pass from
5860 generating insn patterns with invalid hard register operands.
5861 These invalid insns can eventually confuse reload to error out
5862 with a spill failure. See also PRs 46829 and 46843. */
5863 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5864 {
5865 int i;
5866
5867 extract_insn (insn);
5868 preprocess_constraints (insn);
5869
5870 int n_operands = recog_data.n_operands;
5871 int n_alternatives = recog_data.n_alternatives;
5872 for (i = 0; i < n_operands; i++)
5873 {
5874 rtx op = recog_data.operand[i];
5875 enum machine_mode mode = GET_MODE (op);
5876 const operand_alternative *op_alt;
5877 int offset = 0;
5878 bool win;
5879 int j;
5880
5881 /* For pre-AVX disallow unaligned loads/stores where the
5882 instructions don't support it. */
5883 if (!TARGET_AVX
5884 && VECTOR_MODE_P (GET_MODE (op))
5885 && misaligned_operand (op, GET_MODE (op)))
5886 {
5887 int min_align = get_attr_ssememalign (insn);
5888 if (min_align == 0)
5889 return false;
5890 }
5891
5892 /* A unary operator may be accepted by the predicate, but it
5893 is irrelevant for matching constraints. */
5894 if (UNARY_P (op))
5895 op = XEXP (op, 0);
5896
5897 if (GET_CODE (op) == SUBREG)
5898 {
5899 if (REG_P (SUBREG_REG (op))
5900 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5901 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5902 GET_MODE (SUBREG_REG (op)),
5903 SUBREG_BYTE (op),
5904 GET_MODE (op));
5905 op = SUBREG_REG (op);
5906 }
5907
5908 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5909 continue;
5910
5911 op_alt = recog_op_alt;
5912
5913 /* Operand has no constraints, anything is OK. */
5914 win = !n_alternatives;
5915
5916 alternative_mask preferred = get_preferred_alternatives (insn);
5917 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5918 {
5919 if (!TEST_BIT (preferred, j))
5920 continue;
5921 if (op_alt[i].anything_ok
5922 || (op_alt[i].matches != -1
5923 && operands_match_p
5924 (recog_data.operand[i],
5925 recog_data.operand[op_alt[i].matches]))
5926 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5927 {
5928 win = true;
5929 break;
5930 }
5931 }
5932
5933 if (!win)
5934 return false;
5935 }
5936 }
5937
5938 return true;
5939 }
5940 \f
5941 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5942
5943 static unsigned HOST_WIDE_INT
5944 ix86_asan_shadow_offset (void)
5945 {
5946 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5947 : HOST_WIDE_INT_C (0x7fff8000))
5948 : (HOST_WIDE_INT_1 << 29);
5949 }
5950 \f
5951 /* Argument support functions. */
5952
5953 /* Return true when register may be used to pass function parameters. */
5954 bool
5955 ix86_function_arg_regno_p (int regno)
5956 {
5957 int i;
5958 const int *parm_regs;
5959
5960 if (!TARGET_64BIT)
5961 {
5962 if (TARGET_MACHO)
5963 return (regno < REGPARM_MAX
5964 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5965 else
5966 return (regno < REGPARM_MAX
5967 || (TARGET_MMX && MMX_REGNO_P (regno)
5968 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5969 || (TARGET_SSE && SSE_REGNO_P (regno)
5970 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5971 }
5972
5973 if (TARGET_SSE && SSE_REGNO_P (regno)
5974 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5975 return true;
5976
5977 /* TODO: The function should depend on current function ABI but
5978 builtins.c would need updating then. Therefore we use the
5979 default ABI. */
5980
5981 /* RAX is used as hidden argument to va_arg functions. */
5982 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5983 return true;
5984
5985 if (ix86_abi == MS_ABI)
5986 parm_regs = x86_64_ms_abi_int_parameter_registers;
5987 else
5988 parm_regs = x86_64_int_parameter_registers;
5989 for (i = 0; i < (ix86_abi == MS_ABI
5990 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5991 if (regno == parm_regs[i])
5992 return true;
5993 return false;
5994 }
5995
5996 /* Return if we do not know how to pass TYPE solely in registers. */
5997
5998 static bool
5999 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
6000 {
6001 if (must_pass_in_stack_var_size_or_pad (mode, type))
6002 return true;
6003
6004 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6005 The layout_type routine is crafty and tries to trick us into passing
6006 currently unsupported vector types on the stack by using TImode. */
6007 return (!TARGET_64BIT && mode == TImode
6008 && type && TREE_CODE (type) != VECTOR_TYPE);
6009 }
6010
6011 /* It returns the size, in bytes, of the area reserved for arguments passed
6012 in registers for the function represented by fndecl dependent to the used
6013 abi format. */
6014 int
6015 ix86_reg_parm_stack_space (const_tree fndecl)
6016 {
6017 enum calling_abi call_abi = SYSV_ABI;
6018 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6019 call_abi = ix86_function_abi (fndecl);
6020 else
6021 call_abi = ix86_function_type_abi (fndecl);
6022 if (TARGET_64BIT && call_abi == MS_ABI)
6023 return 32;
6024 return 0;
6025 }
6026
6027 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6028 call abi used. */
6029 enum calling_abi
6030 ix86_function_type_abi (const_tree fntype)
6031 {
6032 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6033 {
6034 enum calling_abi abi = ix86_abi;
6035 if (abi == SYSV_ABI)
6036 {
6037 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6038 abi = MS_ABI;
6039 }
6040 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6041 abi = SYSV_ABI;
6042 return abi;
6043 }
6044 return ix86_abi;
6045 }
6046
6047 /* We add this as a workaround in order to use libc_has_function
6048 hook in i386.md. */
6049 bool
6050 ix86_libc_has_function (enum function_class fn_class)
6051 {
6052 return targetm.libc_has_function (fn_class);
6053 }
6054
6055 static bool
6056 ix86_function_ms_hook_prologue (const_tree fn)
6057 {
6058 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6059 {
6060 if (decl_function_context (fn) != NULL_TREE)
6061 error_at (DECL_SOURCE_LOCATION (fn),
6062 "ms_hook_prologue is not compatible with nested function");
6063 else
6064 return true;
6065 }
6066 return false;
6067 }
6068
6069 static enum calling_abi
6070 ix86_function_abi (const_tree fndecl)
6071 {
6072 if (! fndecl)
6073 return ix86_abi;
6074 return ix86_function_type_abi (TREE_TYPE (fndecl));
6075 }
6076
6077 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6078 call abi used. */
6079 enum calling_abi
6080 ix86_cfun_abi (void)
6081 {
6082 if (! cfun)
6083 return ix86_abi;
6084 return cfun->machine->call_abi;
6085 }
6086
6087 /* Write the extra assembler code needed to declare a function properly. */
6088
6089 void
6090 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6091 tree decl)
6092 {
6093 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6094
6095 if (is_ms_hook)
6096 {
6097 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6098 unsigned int filler_cc = 0xcccccccc;
6099
6100 for (i = 0; i < filler_count; i += 4)
6101 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6102 }
6103
6104 #ifdef SUBTARGET_ASM_UNWIND_INIT
6105 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6106 #endif
6107
6108 ASM_OUTPUT_LABEL (asm_out_file, fname);
6109
6110 /* Output magic byte marker, if hot-patch attribute is set. */
6111 if (is_ms_hook)
6112 {
6113 if (TARGET_64BIT)
6114 {
6115 /* leaq [%rsp + 0], %rsp */
6116 asm_fprintf (asm_out_file, ASM_BYTE
6117 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6118 }
6119 else
6120 {
6121 /* movl.s %edi, %edi
6122 push %ebp
6123 movl.s %esp, %ebp */
6124 asm_fprintf (asm_out_file, ASM_BYTE
6125 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6126 }
6127 }
6128 }
6129
6130 /* regclass.c */
6131 extern void init_regs (void);
6132
6133 /* Implementation of call abi switching target hook. Specific to FNDECL
6134 the specific call register sets are set. See also
6135 ix86_conditional_register_usage for more details. */
6136 void
6137 ix86_call_abi_override (const_tree fndecl)
6138 {
6139 if (fndecl == NULL_TREE)
6140 cfun->machine->call_abi = ix86_abi;
6141 else
6142 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6143 }
6144
6145 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6146 expensive re-initialization of init_regs each time we switch function context
6147 since this is needed only during RTL expansion. */
6148 static void
6149 ix86_maybe_switch_abi (void)
6150 {
6151 if (TARGET_64BIT &&
6152 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6153 reinit_regs ();
6154 }
6155
6156 /* Return 1 if pseudo register should be created and used to hold
6157 GOT address for PIC code. */
6158 static bool
6159 ix86_use_pseudo_pic_reg (void)
6160 {
6161 if ((TARGET_64BIT
6162 && (ix86_cmodel == CM_SMALL_PIC
6163 || TARGET_PECOFF))
6164 || !flag_pic)
6165 return false;
6166 return true;
6167 }
6168
6169 /* Create and initialize PIC register if required. */
6170 static void
6171 ix86_init_pic_reg (void)
6172 {
6173 edge entry_edge;
6174 rtx_insn *seq;
6175
6176 if (!ix86_use_pseudo_pic_reg ())
6177 return;
6178
6179 start_sequence ();
6180
6181 if (TARGET_64BIT)
6182 {
6183 if (ix86_cmodel == CM_LARGE_PIC)
6184 {
6185 rtx_code_label *label;
6186 rtx tmp_reg;
6187
6188 gcc_assert (Pmode == DImode);
6189 label = gen_label_rtx ();
6190 emit_label (label);
6191 LABEL_PRESERVE_P (label) = 1;
6192 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
6193 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6194 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6195 label));
6196 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6197 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6198 pic_offset_table_rtx, tmp_reg));
6199 }
6200 else
6201 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6202 }
6203 else
6204 {
6205 rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6206 RTX_FRAME_RELATED_P (insn) = 1;
6207 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6208 }
6209
6210 seq = get_insns ();
6211 end_sequence ();
6212
6213 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6214 insert_insn_on_edge (seq, entry_edge);
6215 commit_one_edge_insertion (entry_edge);
6216 }
6217
6218 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6219 for a call to a function whose data type is FNTYPE.
6220 For a library call, FNTYPE is 0. */
6221
6222 void
6223 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6224 tree fntype, /* tree ptr for function decl */
6225 rtx libname, /* SYMBOL_REF of library name or 0 */
6226 tree fndecl,
6227 int caller)
6228 {
6229 struct cgraph_local_info *i;
6230
6231 memset (cum, 0, sizeof (*cum));
6232
6233 if (fndecl)
6234 {
6235 i = cgraph_node::local_info (fndecl);
6236 cum->call_abi = ix86_function_abi (fndecl);
6237 }
6238 else
6239 {
6240 i = NULL;
6241 cum->call_abi = ix86_function_type_abi (fntype);
6242 }
6243
6244 cum->caller = caller;
6245
6246 /* Set up the number of registers to use for passing arguments. */
6247 cum->nregs = ix86_regparm;
6248 if (TARGET_64BIT)
6249 {
6250 cum->nregs = (cum->call_abi == SYSV_ABI
6251 ? X86_64_REGPARM_MAX
6252 : X86_64_MS_REGPARM_MAX);
6253 }
6254 if (TARGET_SSE)
6255 {
6256 cum->sse_nregs = SSE_REGPARM_MAX;
6257 if (TARGET_64BIT)
6258 {
6259 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6260 ? X86_64_SSE_REGPARM_MAX
6261 : X86_64_MS_SSE_REGPARM_MAX);
6262 }
6263 }
6264 if (TARGET_MMX)
6265 cum->mmx_nregs = MMX_REGPARM_MAX;
6266 cum->warn_avx512f = true;
6267 cum->warn_avx = true;
6268 cum->warn_sse = true;
6269 cum->warn_mmx = true;
6270
6271 /* Because type might mismatch in between caller and callee, we need to
6272 use actual type of function for local calls.
6273 FIXME: cgraph_analyze can be told to actually record if function uses
6274 va_start so for local functions maybe_vaarg can be made aggressive
6275 helping K&R code.
6276 FIXME: once typesytem is fixed, we won't need this code anymore. */
6277 if (i && i->local && i->can_change_signature)
6278 fntype = TREE_TYPE (fndecl);
6279 cum->maybe_vaarg = (fntype
6280 ? (!prototype_p (fntype) || stdarg_p (fntype))
6281 : !libname);
6282
6283 if (!TARGET_64BIT)
6284 {
6285 /* If there are variable arguments, then we won't pass anything
6286 in registers in 32-bit mode. */
6287 if (stdarg_p (fntype))
6288 {
6289 cum->nregs = 0;
6290 cum->sse_nregs = 0;
6291 cum->mmx_nregs = 0;
6292 cum->warn_avx512f = false;
6293 cum->warn_avx = false;
6294 cum->warn_sse = false;
6295 cum->warn_mmx = false;
6296 return;
6297 }
6298
6299 /* Use ecx and edx registers if function has fastcall attribute,
6300 else look for regparm information. */
6301 if (fntype)
6302 {
6303 unsigned int ccvt = ix86_get_callcvt (fntype);
6304 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6305 {
6306 cum->nregs = 1;
6307 cum->fastcall = 1; /* Same first register as in fastcall. */
6308 }
6309 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6310 {
6311 cum->nregs = 2;
6312 cum->fastcall = 1;
6313 }
6314 else
6315 cum->nregs = ix86_function_regparm (fntype, fndecl);
6316 }
6317
6318 /* Set up the number of SSE registers used for passing SFmode
6319 and DFmode arguments. Warn for mismatching ABI. */
6320 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6321 }
6322 }
6323
6324 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6325 But in the case of vector types, it is some vector mode.
6326
6327 When we have only some of our vector isa extensions enabled, then there
6328 are some modes for which vector_mode_supported_p is false. For these
6329 modes, the generic vector support in gcc will choose some non-vector mode
6330 in order to implement the type. By computing the natural mode, we'll
6331 select the proper ABI location for the operand and not depend on whatever
6332 the middle-end decides to do with these vector types.
6333
6334 The midde-end can't deal with the vector types > 16 bytes. In this
6335 case, we return the original mode and warn ABI change if CUM isn't
6336 NULL.
6337
6338 If INT_RETURN is true, warn ABI change if the vector mode isn't
6339 available for function return value. */
6340
6341 static enum machine_mode
6342 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6343 bool in_return)
6344 {
6345 enum machine_mode mode = TYPE_MODE (type);
6346
6347 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6348 {
6349 HOST_WIDE_INT size = int_size_in_bytes (type);
6350 if ((size == 8 || size == 16 || size == 32 || size == 64)
6351 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6352 && TYPE_VECTOR_SUBPARTS (type) > 1)
6353 {
6354 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6355
6356 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6357 mode = MIN_MODE_VECTOR_FLOAT;
6358 else
6359 mode = MIN_MODE_VECTOR_INT;
6360
6361 /* Get the mode which has this inner mode and number of units. */
6362 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6363 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6364 && GET_MODE_INNER (mode) == innermode)
6365 {
6366 if (size == 64 && !TARGET_AVX512F)
6367 {
6368 static bool warnedavx512f;
6369 static bool warnedavx512f_ret;
6370
6371 if (cum && cum->warn_avx512f && !warnedavx512f)
6372 {
6373 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6374 "without AVX512F enabled changes the ABI"))
6375 warnedavx512f = true;
6376 }
6377 else if (in_return && !warnedavx512f_ret)
6378 {
6379 if (warning (OPT_Wpsabi, "AVX512F vector return "
6380 "without AVX512F enabled changes the ABI"))
6381 warnedavx512f_ret = true;
6382 }
6383
6384 return TYPE_MODE (type);
6385 }
6386 else if (size == 32 && !TARGET_AVX)
6387 {
6388 static bool warnedavx;
6389 static bool warnedavx_ret;
6390
6391 if (cum && cum->warn_avx && !warnedavx)
6392 {
6393 if (warning (OPT_Wpsabi, "AVX vector argument "
6394 "without AVX enabled changes the ABI"))
6395 warnedavx = true;
6396 }
6397 else if (in_return && !warnedavx_ret)
6398 {
6399 if (warning (OPT_Wpsabi, "AVX vector return "
6400 "without AVX enabled changes the ABI"))
6401 warnedavx_ret = true;
6402 }
6403
6404 return TYPE_MODE (type);
6405 }
6406 else if (((size == 8 && TARGET_64BIT) || size == 16)
6407 && !TARGET_SSE)
6408 {
6409 static bool warnedsse;
6410 static bool warnedsse_ret;
6411
6412 if (cum && cum->warn_sse && !warnedsse)
6413 {
6414 if (warning (OPT_Wpsabi, "SSE vector argument "
6415 "without SSE enabled changes the ABI"))
6416 warnedsse = true;
6417 }
6418 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6419 {
6420 if (warning (OPT_Wpsabi, "SSE vector return "
6421 "without SSE enabled changes the ABI"))
6422 warnedsse_ret = true;
6423 }
6424 }
6425 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6426 {
6427 static bool warnedmmx;
6428 static bool warnedmmx_ret;
6429
6430 if (cum && cum->warn_mmx && !warnedmmx)
6431 {
6432 if (warning (OPT_Wpsabi, "MMX vector argument "
6433 "without MMX enabled changes the ABI"))
6434 warnedmmx = true;
6435 }
6436 else if (in_return && !warnedmmx_ret)
6437 {
6438 if (warning (OPT_Wpsabi, "MMX vector return "
6439 "without MMX enabled changes the ABI"))
6440 warnedmmx_ret = true;
6441 }
6442 }
6443 return mode;
6444 }
6445
6446 gcc_unreachable ();
6447 }
6448 }
6449
6450 return mode;
6451 }
6452
6453 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6454 this may not agree with the mode that the type system has chosen for the
6455 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6456 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6457
6458 static rtx
6459 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6460 unsigned int regno)
6461 {
6462 rtx tmp;
6463
6464 if (orig_mode != BLKmode)
6465 tmp = gen_rtx_REG (orig_mode, regno);
6466 else
6467 {
6468 tmp = gen_rtx_REG (mode, regno);
6469 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6470 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6471 }
6472
6473 return tmp;
6474 }
6475
6476 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6477 of this code is to classify each 8bytes of incoming argument by the register
6478 class and assign registers accordingly. */
6479
6480 /* Return the union class of CLASS1 and CLASS2.
6481 See the x86-64 PS ABI for details. */
6482
6483 static enum x86_64_reg_class
6484 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6485 {
6486 /* Rule #1: If both classes are equal, this is the resulting class. */
6487 if (class1 == class2)
6488 return class1;
6489
6490 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6491 the other class. */
6492 if (class1 == X86_64_NO_CLASS)
6493 return class2;
6494 if (class2 == X86_64_NO_CLASS)
6495 return class1;
6496
6497 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6498 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6499 return X86_64_MEMORY_CLASS;
6500
6501 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6502 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6503 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6504 return X86_64_INTEGERSI_CLASS;
6505 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6506 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6507 return X86_64_INTEGER_CLASS;
6508
6509 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6510 MEMORY is used. */
6511 if (class1 == X86_64_X87_CLASS
6512 || class1 == X86_64_X87UP_CLASS
6513 || class1 == X86_64_COMPLEX_X87_CLASS
6514 || class2 == X86_64_X87_CLASS
6515 || class2 == X86_64_X87UP_CLASS
6516 || class2 == X86_64_COMPLEX_X87_CLASS)
6517 return X86_64_MEMORY_CLASS;
6518
6519 /* Rule #6: Otherwise class SSE is used. */
6520 return X86_64_SSE_CLASS;
6521 }
6522
6523 /* Classify the argument of type TYPE and mode MODE.
6524 CLASSES will be filled by the register class used to pass each word
6525 of the operand. The number of words is returned. In case the parameter
6526 should be passed in memory, 0 is returned. As a special case for zero
6527 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6528
6529 BIT_OFFSET is used internally for handling records and specifies offset
6530 of the offset in bits modulo 512 to avoid overflow cases.
6531
6532 See the x86-64 PS ABI for details.
6533 */
6534
6535 static int
6536 classify_argument (enum machine_mode mode, const_tree type,
6537 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6538 {
6539 HOST_WIDE_INT bytes =
6540 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6541 int words
6542 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6543
6544 /* Variable sized entities are always passed/returned in memory. */
6545 if (bytes < 0)
6546 return 0;
6547
6548 if (mode != VOIDmode
6549 && targetm.calls.must_pass_in_stack (mode, type))
6550 return 0;
6551
6552 if (type && AGGREGATE_TYPE_P (type))
6553 {
6554 int i;
6555 tree field;
6556 enum x86_64_reg_class subclasses[MAX_CLASSES];
6557
6558 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6559 if (bytes > 64)
6560 return 0;
6561
6562 for (i = 0; i < words; i++)
6563 classes[i] = X86_64_NO_CLASS;
6564
6565 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6566 signalize memory class, so handle it as special case. */
6567 if (!words)
6568 {
6569 classes[0] = X86_64_NO_CLASS;
6570 return 1;
6571 }
6572
6573 /* Classify each field of record and merge classes. */
6574 switch (TREE_CODE (type))
6575 {
6576 case RECORD_TYPE:
6577 /* And now merge the fields of structure. */
6578 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6579 {
6580 if (TREE_CODE (field) == FIELD_DECL)
6581 {
6582 int num;
6583
6584 if (TREE_TYPE (field) == error_mark_node)
6585 continue;
6586
6587 /* Bitfields are always classified as integer. Handle them
6588 early, since later code would consider them to be
6589 misaligned integers. */
6590 if (DECL_BIT_FIELD (field))
6591 {
6592 for (i = (int_bit_position (field)
6593 + (bit_offset % 64)) / 8 / 8;
6594 i < ((int_bit_position (field) + (bit_offset % 64))
6595 + tree_to_shwi (DECL_SIZE (field))
6596 + 63) / 8 / 8; i++)
6597 classes[i] =
6598 merge_classes (X86_64_INTEGER_CLASS,
6599 classes[i]);
6600 }
6601 else
6602 {
6603 int pos;
6604
6605 type = TREE_TYPE (field);
6606
6607 /* Flexible array member is ignored. */
6608 if (TYPE_MODE (type) == BLKmode
6609 && TREE_CODE (type) == ARRAY_TYPE
6610 && TYPE_SIZE (type) == NULL_TREE
6611 && TYPE_DOMAIN (type) != NULL_TREE
6612 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6613 == NULL_TREE))
6614 {
6615 static bool warned;
6616
6617 if (!warned && warn_psabi)
6618 {
6619 warned = true;
6620 inform (input_location,
6621 "the ABI of passing struct with"
6622 " a flexible array member has"
6623 " changed in GCC 4.4");
6624 }
6625 continue;
6626 }
6627 num = classify_argument (TYPE_MODE (type), type,
6628 subclasses,
6629 (int_bit_position (field)
6630 + bit_offset) % 512);
6631 if (!num)
6632 return 0;
6633 pos = (int_bit_position (field)
6634 + (bit_offset % 64)) / 8 / 8;
6635 for (i = 0; i < num && (i + pos) < words; i++)
6636 classes[i + pos] =
6637 merge_classes (subclasses[i], classes[i + pos]);
6638 }
6639 }
6640 }
6641 break;
6642
6643 case ARRAY_TYPE:
6644 /* Arrays are handled as small records. */
6645 {
6646 int num;
6647 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6648 TREE_TYPE (type), subclasses, bit_offset);
6649 if (!num)
6650 return 0;
6651
6652 /* The partial classes are now full classes. */
6653 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6654 subclasses[0] = X86_64_SSE_CLASS;
6655 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6656 && !((bit_offset % 64) == 0 && bytes == 4))
6657 subclasses[0] = X86_64_INTEGER_CLASS;
6658
6659 for (i = 0; i < words; i++)
6660 classes[i] = subclasses[i % num];
6661
6662 break;
6663 }
6664 case UNION_TYPE:
6665 case QUAL_UNION_TYPE:
6666 /* Unions are similar to RECORD_TYPE but offset is always 0.
6667 */
6668 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6669 {
6670 if (TREE_CODE (field) == FIELD_DECL)
6671 {
6672 int num;
6673
6674 if (TREE_TYPE (field) == error_mark_node)
6675 continue;
6676
6677 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6678 TREE_TYPE (field), subclasses,
6679 bit_offset);
6680 if (!num)
6681 return 0;
6682 for (i = 0; i < num && i < words; i++)
6683 classes[i] = merge_classes (subclasses[i], classes[i]);
6684 }
6685 }
6686 break;
6687
6688 default:
6689 gcc_unreachable ();
6690 }
6691
6692 if (words > 2)
6693 {
6694 /* When size > 16 bytes, if the first one isn't
6695 X86_64_SSE_CLASS or any other ones aren't
6696 X86_64_SSEUP_CLASS, everything should be passed in
6697 memory. */
6698 if (classes[0] != X86_64_SSE_CLASS)
6699 return 0;
6700
6701 for (i = 1; i < words; i++)
6702 if (classes[i] != X86_64_SSEUP_CLASS)
6703 return 0;
6704 }
6705
6706 /* Final merger cleanup. */
6707 for (i = 0; i < words; i++)
6708 {
6709 /* If one class is MEMORY, everything should be passed in
6710 memory. */
6711 if (classes[i] == X86_64_MEMORY_CLASS)
6712 return 0;
6713
6714 /* The X86_64_SSEUP_CLASS should be always preceded by
6715 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6716 if (classes[i] == X86_64_SSEUP_CLASS
6717 && classes[i - 1] != X86_64_SSE_CLASS
6718 && classes[i - 1] != X86_64_SSEUP_CLASS)
6719 {
6720 /* The first one should never be X86_64_SSEUP_CLASS. */
6721 gcc_assert (i != 0);
6722 classes[i] = X86_64_SSE_CLASS;
6723 }
6724
6725 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6726 everything should be passed in memory. */
6727 if (classes[i] == X86_64_X87UP_CLASS
6728 && (classes[i - 1] != X86_64_X87_CLASS))
6729 {
6730 static bool warned;
6731
6732 /* The first one should never be X86_64_X87UP_CLASS. */
6733 gcc_assert (i != 0);
6734 if (!warned && warn_psabi)
6735 {
6736 warned = true;
6737 inform (input_location,
6738 "the ABI of passing union with long double"
6739 " has changed in GCC 4.4");
6740 }
6741 return 0;
6742 }
6743 }
6744 return words;
6745 }
6746
6747 /* Compute alignment needed. We align all types to natural boundaries with
6748 exception of XFmode that is aligned to 64bits. */
6749 if (mode != VOIDmode && mode != BLKmode)
6750 {
6751 int mode_alignment = GET_MODE_BITSIZE (mode);
6752
6753 if (mode == XFmode)
6754 mode_alignment = 128;
6755 else if (mode == XCmode)
6756 mode_alignment = 256;
6757 if (COMPLEX_MODE_P (mode))
6758 mode_alignment /= 2;
6759 /* Misaligned fields are always returned in memory. */
6760 if (bit_offset % mode_alignment)
6761 return 0;
6762 }
6763
6764 /* for V1xx modes, just use the base mode */
6765 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6766 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6767 mode = GET_MODE_INNER (mode);
6768
6769 /* Classification of atomic types. */
6770 switch (mode)
6771 {
6772 case SDmode:
6773 case DDmode:
6774 classes[0] = X86_64_SSE_CLASS;
6775 return 1;
6776 case TDmode:
6777 classes[0] = X86_64_SSE_CLASS;
6778 classes[1] = X86_64_SSEUP_CLASS;
6779 return 2;
6780 case DImode:
6781 case SImode:
6782 case HImode:
6783 case QImode:
6784 case CSImode:
6785 case CHImode:
6786 case CQImode:
6787 {
6788 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6789
6790 /* Analyze last 128 bits only. */
6791 size = (size - 1) & 0x7f;
6792
6793 if (size < 32)
6794 {
6795 classes[0] = X86_64_INTEGERSI_CLASS;
6796 return 1;
6797 }
6798 else if (size < 64)
6799 {
6800 classes[0] = X86_64_INTEGER_CLASS;
6801 return 1;
6802 }
6803 else if (size < 64+32)
6804 {
6805 classes[0] = X86_64_INTEGER_CLASS;
6806 classes[1] = X86_64_INTEGERSI_CLASS;
6807 return 2;
6808 }
6809 else if (size < 64+64)
6810 {
6811 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6812 return 2;
6813 }
6814 else
6815 gcc_unreachable ();
6816 }
6817 case CDImode:
6818 case TImode:
6819 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6820 return 2;
6821 case COImode:
6822 case OImode:
6823 /* OImode shouldn't be used directly. */
6824 gcc_unreachable ();
6825 case CTImode:
6826 return 0;
6827 case SFmode:
6828 if (!(bit_offset % 64))
6829 classes[0] = X86_64_SSESF_CLASS;
6830 else
6831 classes[0] = X86_64_SSE_CLASS;
6832 return 1;
6833 case DFmode:
6834 classes[0] = X86_64_SSEDF_CLASS;
6835 return 1;
6836 case XFmode:
6837 classes[0] = X86_64_X87_CLASS;
6838 classes[1] = X86_64_X87UP_CLASS;
6839 return 2;
6840 case TFmode:
6841 classes[0] = X86_64_SSE_CLASS;
6842 classes[1] = X86_64_SSEUP_CLASS;
6843 return 2;
6844 case SCmode:
6845 classes[0] = X86_64_SSE_CLASS;
6846 if (!(bit_offset % 64))
6847 return 1;
6848 else
6849 {
6850 static bool warned;
6851
6852 if (!warned && warn_psabi)
6853 {
6854 warned = true;
6855 inform (input_location,
6856 "the ABI of passing structure with complex float"
6857 " member has changed in GCC 4.4");
6858 }
6859 classes[1] = X86_64_SSESF_CLASS;
6860 return 2;
6861 }
6862 case DCmode:
6863 classes[0] = X86_64_SSEDF_CLASS;
6864 classes[1] = X86_64_SSEDF_CLASS;
6865 return 2;
6866 case XCmode:
6867 classes[0] = X86_64_COMPLEX_X87_CLASS;
6868 return 1;
6869 case TCmode:
6870 /* This modes is larger than 16 bytes. */
6871 return 0;
6872 case V8SFmode:
6873 case V8SImode:
6874 case V32QImode:
6875 case V16HImode:
6876 case V4DFmode:
6877 case V4DImode:
6878 classes[0] = X86_64_SSE_CLASS;
6879 classes[1] = X86_64_SSEUP_CLASS;
6880 classes[2] = X86_64_SSEUP_CLASS;
6881 classes[3] = X86_64_SSEUP_CLASS;
6882 return 4;
6883 case V8DFmode:
6884 case V16SFmode:
6885 case V8DImode:
6886 case V16SImode:
6887 case V32HImode:
6888 case V64QImode:
6889 classes[0] = X86_64_SSE_CLASS;
6890 classes[1] = X86_64_SSEUP_CLASS;
6891 classes[2] = X86_64_SSEUP_CLASS;
6892 classes[3] = X86_64_SSEUP_CLASS;
6893 classes[4] = X86_64_SSEUP_CLASS;
6894 classes[5] = X86_64_SSEUP_CLASS;
6895 classes[6] = X86_64_SSEUP_CLASS;
6896 classes[7] = X86_64_SSEUP_CLASS;
6897 return 8;
6898 case V4SFmode:
6899 case V4SImode:
6900 case V16QImode:
6901 case V8HImode:
6902 case V2DFmode:
6903 case V2DImode:
6904 classes[0] = X86_64_SSE_CLASS;
6905 classes[1] = X86_64_SSEUP_CLASS;
6906 return 2;
6907 case V1TImode:
6908 case V1DImode:
6909 case V2SFmode:
6910 case V2SImode:
6911 case V4HImode:
6912 case V8QImode:
6913 classes[0] = X86_64_SSE_CLASS;
6914 return 1;
6915 case BLKmode:
6916 case VOIDmode:
6917 return 0;
6918 default:
6919 gcc_assert (VECTOR_MODE_P (mode));
6920
6921 if (bytes > 16)
6922 return 0;
6923
6924 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6925
6926 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6927 classes[0] = X86_64_INTEGERSI_CLASS;
6928 else
6929 classes[0] = X86_64_INTEGER_CLASS;
6930 classes[1] = X86_64_INTEGER_CLASS;
6931 return 1 + (bytes > 8);
6932 }
6933 }
6934
6935 /* Examine the argument and return set number of register required in each
6936 class. Return true iff parameter should be passed in memory. */
6937
6938 static bool
6939 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6940 int *int_nregs, int *sse_nregs)
6941 {
6942 enum x86_64_reg_class regclass[MAX_CLASSES];
6943 int n = classify_argument (mode, type, regclass, 0);
6944
6945 *int_nregs = 0;
6946 *sse_nregs = 0;
6947
6948 if (!n)
6949 return true;
6950 for (n--; n >= 0; n--)
6951 switch (regclass[n])
6952 {
6953 case X86_64_INTEGER_CLASS:
6954 case X86_64_INTEGERSI_CLASS:
6955 (*int_nregs)++;
6956 break;
6957 case X86_64_SSE_CLASS:
6958 case X86_64_SSESF_CLASS:
6959 case X86_64_SSEDF_CLASS:
6960 (*sse_nregs)++;
6961 break;
6962 case X86_64_NO_CLASS:
6963 case X86_64_SSEUP_CLASS:
6964 break;
6965 case X86_64_X87_CLASS:
6966 case X86_64_X87UP_CLASS:
6967 case X86_64_COMPLEX_X87_CLASS:
6968 if (!in_return)
6969 return true;
6970 break;
6971 case X86_64_MEMORY_CLASS:
6972 gcc_unreachable ();
6973 }
6974
6975 return false;
6976 }
6977
6978 /* Construct container for the argument used by GCC interface. See
6979 FUNCTION_ARG for the detailed description. */
6980
6981 static rtx
6982 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6983 const_tree type, int in_return, int nintregs, int nsseregs,
6984 const int *intreg, int sse_regno)
6985 {
6986 /* The following variables hold the static issued_error state. */
6987 static bool issued_sse_arg_error;
6988 static bool issued_sse_ret_error;
6989 static bool issued_x87_ret_error;
6990
6991 enum machine_mode tmpmode;
6992 int bytes =
6993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6994 enum x86_64_reg_class regclass[MAX_CLASSES];
6995 int n;
6996 int i;
6997 int nexps = 0;
6998 int needed_sseregs, needed_intregs;
6999 rtx exp[MAX_CLASSES];
7000 rtx ret;
7001
7002 n = classify_argument (mode, type, regclass, 0);
7003 if (!n)
7004 return NULL;
7005 if (examine_argument (mode, type, in_return, &needed_intregs,
7006 &needed_sseregs))
7007 return NULL;
7008 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7009 return NULL;
7010
7011 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7012 some less clueful developer tries to use floating-point anyway. */
7013 if (needed_sseregs && !TARGET_SSE)
7014 {
7015 if (in_return)
7016 {
7017 if (!issued_sse_ret_error)
7018 {
7019 error ("SSE register return with SSE disabled");
7020 issued_sse_ret_error = true;
7021 }
7022 }
7023 else if (!issued_sse_arg_error)
7024 {
7025 error ("SSE register argument with SSE disabled");
7026 issued_sse_arg_error = true;
7027 }
7028 return NULL;
7029 }
7030
7031 /* Likewise, error if the ABI requires us to return values in the
7032 x87 registers and the user specified -mno-80387. */
7033 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7034 for (i = 0; i < n; i++)
7035 if (regclass[i] == X86_64_X87_CLASS
7036 || regclass[i] == X86_64_X87UP_CLASS
7037 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7038 {
7039 if (!issued_x87_ret_error)
7040 {
7041 error ("x87 register return with x87 disabled");
7042 issued_x87_ret_error = true;
7043 }
7044 return NULL;
7045 }
7046
7047 /* First construct simple cases. Avoid SCmode, since we want to use
7048 single register to pass this type. */
7049 if (n == 1 && mode != SCmode)
7050 switch (regclass[0])
7051 {
7052 case X86_64_INTEGER_CLASS:
7053 case X86_64_INTEGERSI_CLASS:
7054 return gen_rtx_REG (mode, intreg[0]);
7055 case X86_64_SSE_CLASS:
7056 case X86_64_SSESF_CLASS:
7057 case X86_64_SSEDF_CLASS:
7058 if (mode != BLKmode)
7059 return gen_reg_or_parallel (mode, orig_mode,
7060 SSE_REGNO (sse_regno));
7061 break;
7062 case X86_64_X87_CLASS:
7063 case X86_64_COMPLEX_X87_CLASS:
7064 return gen_rtx_REG (mode, FIRST_STACK_REG);
7065 case X86_64_NO_CLASS:
7066 /* Zero sized array, struct or class. */
7067 return NULL;
7068 default:
7069 gcc_unreachable ();
7070 }
7071 if (n == 2
7072 && regclass[0] == X86_64_SSE_CLASS
7073 && regclass[1] == X86_64_SSEUP_CLASS
7074 && mode != BLKmode)
7075 return gen_reg_or_parallel (mode, orig_mode,
7076 SSE_REGNO (sse_regno));
7077 if (n == 4
7078 && regclass[0] == X86_64_SSE_CLASS
7079 && regclass[1] == X86_64_SSEUP_CLASS
7080 && regclass[2] == X86_64_SSEUP_CLASS
7081 && regclass[3] == X86_64_SSEUP_CLASS
7082 && mode != BLKmode)
7083 return gen_reg_or_parallel (mode, orig_mode,
7084 SSE_REGNO (sse_regno));
7085 if (n == 8
7086 && regclass[0] == X86_64_SSE_CLASS
7087 && regclass[1] == X86_64_SSEUP_CLASS
7088 && regclass[2] == X86_64_SSEUP_CLASS
7089 && regclass[3] == X86_64_SSEUP_CLASS
7090 && regclass[4] == X86_64_SSEUP_CLASS
7091 && regclass[5] == X86_64_SSEUP_CLASS
7092 && regclass[6] == X86_64_SSEUP_CLASS
7093 && regclass[7] == X86_64_SSEUP_CLASS
7094 && mode != BLKmode)
7095 return gen_reg_or_parallel (mode, orig_mode,
7096 SSE_REGNO (sse_regno));
7097 if (n == 2
7098 && regclass[0] == X86_64_X87_CLASS
7099 && regclass[1] == X86_64_X87UP_CLASS)
7100 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7101
7102 if (n == 2
7103 && regclass[0] == X86_64_INTEGER_CLASS
7104 && regclass[1] == X86_64_INTEGER_CLASS
7105 && (mode == CDImode || mode == TImode)
7106 && intreg[0] + 1 == intreg[1])
7107 return gen_rtx_REG (mode, intreg[0]);
7108
7109 /* Otherwise figure out the entries of the PARALLEL. */
7110 for (i = 0; i < n; i++)
7111 {
7112 int pos;
7113
7114 switch (regclass[i])
7115 {
7116 case X86_64_NO_CLASS:
7117 break;
7118 case X86_64_INTEGER_CLASS:
7119 case X86_64_INTEGERSI_CLASS:
7120 /* Merge TImodes on aligned occasions here too. */
7121 if (i * 8 + 8 > bytes)
7122 tmpmode
7123 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7124 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7125 tmpmode = SImode;
7126 else
7127 tmpmode = DImode;
7128 /* We've requested 24 bytes we
7129 don't have mode for. Use DImode. */
7130 if (tmpmode == BLKmode)
7131 tmpmode = DImode;
7132 exp [nexps++]
7133 = gen_rtx_EXPR_LIST (VOIDmode,
7134 gen_rtx_REG (tmpmode, *intreg),
7135 GEN_INT (i*8));
7136 intreg++;
7137 break;
7138 case X86_64_SSESF_CLASS:
7139 exp [nexps++]
7140 = gen_rtx_EXPR_LIST (VOIDmode,
7141 gen_rtx_REG (SFmode,
7142 SSE_REGNO (sse_regno)),
7143 GEN_INT (i*8));
7144 sse_regno++;
7145 break;
7146 case X86_64_SSEDF_CLASS:
7147 exp [nexps++]
7148 = gen_rtx_EXPR_LIST (VOIDmode,
7149 gen_rtx_REG (DFmode,
7150 SSE_REGNO (sse_regno)),
7151 GEN_INT (i*8));
7152 sse_regno++;
7153 break;
7154 case X86_64_SSE_CLASS:
7155 pos = i;
7156 switch (n)
7157 {
7158 case 1:
7159 tmpmode = DImode;
7160 break;
7161 case 2:
7162 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7163 {
7164 tmpmode = TImode;
7165 i++;
7166 }
7167 else
7168 tmpmode = DImode;
7169 break;
7170 case 4:
7171 gcc_assert (i == 0
7172 && regclass[1] == X86_64_SSEUP_CLASS
7173 && regclass[2] == X86_64_SSEUP_CLASS
7174 && regclass[3] == X86_64_SSEUP_CLASS);
7175 tmpmode = OImode;
7176 i += 3;
7177 break;
7178 case 8:
7179 gcc_assert (i == 0
7180 && regclass[1] == X86_64_SSEUP_CLASS
7181 && regclass[2] == X86_64_SSEUP_CLASS
7182 && regclass[3] == X86_64_SSEUP_CLASS
7183 && regclass[4] == X86_64_SSEUP_CLASS
7184 && regclass[5] == X86_64_SSEUP_CLASS
7185 && regclass[6] == X86_64_SSEUP_CLASS
7186 && regclass[7] == X86_64_SSEUP_CLASS);
7187 tmpmode = XImode;
7188 i += 7;
7189 break;
7190 default:
7191 gcc_unreachable ();
7192 }
7193 exp [nexps++]
7194 = gen_rtx_EXPR_LIST (VOIDmode,
7195 gen_rtx_REG (tmpmode,
7196 SSE_REGNO (sse_regno)),
7197 GEN_INT (pos*8));
7198 sse_regno++;
7199 break;
7200 default:
7201 gcc_unreachable ();
7202 }
7203 }
7204
7205 /* Empty aligned struct, union or class. */
7206 if (nexps == 0)
7207 return NULL;
7208
7209 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7210 for (i = 0; i < nexps; i++)
7211 XVECEXP (ret, 0, i) = exp [i];
7212 return ret;
7213 }
7214
7215 /* Update the data in CUM to advance over an argument of mode MODE
7216 and data type TYPE. (TYPE is null for libcalls where that information
7217 may not be available.) */
7218
7219 static void
7220 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7221 const_tree type, HOST_WIDE_INT bytes,
7222 HOST_WIDE_INT words)
7223 {
7224 switch (mode)
7225 {
7226 default:
7227 break;
7228
7229 case BLKmode:
7230 if (bytes < 0)
7231 break;
7232 /* FALLTHRU */
7233
7234 case DImode:
7235 case SImode:
7236 case HImode:
7237 case QImode:
7238 cum->words += words;
7239 cum->nregs -= words;
7240 cum->regno += words;
7241
7242 if (cum->nregs <= 0)
7243 {
7244 cum->nregs = 0;
7245 cum->regno = 0;
7246 }
7247 break;
7248
7249 case OImode:
7250 /* OImode shouldn't be used directly. */
7251 gcc_unreachable ();
7252
7253 case DFmode:
7254 if (cum->float_in_sse < 2)
7255 break;
7256 case SFmode:
7257 if (cum->float_in_sse < 1)
7258 break;
7259 /* FALLTHRU */
7260
7261 case V8SFmode:
7262 case V8SImode:
7263 case V64QImode:
7264 case V32HImode:
7265 case V16SImode:
7266 case V8DImode:
7267 case V16SFmode:
7268 case V8DFmode:
7269 case V32QImode:
7270 case V16HImode:
7271 case V4DFmode:
7272 case V4DImode:
7273 case TImode:
7274 case V16QImode:
7275 case V8HImode:
7276 case V4SImode:
7277 case V2DImode:
7278 case V4SFmode:
7279 case V2DFmode:
7280 if (!type || !AGGREGATE_TYPE_P (type))
7281 {
7282 cum->sse_words += words;
7283 cum->sse_nregs -= 1;
7284 cum->sse_regno += 1;
7285 if (cum->sse_nregs <= 0)
7286 {
7287 cum->sse_nregs = 0;
7288 cum->sse_regno = 0;
7289 }
7290 }
7291 break;
7292
7293 case V8QImode:
7294 case V4HImode:
7295 case V2SImode:
7296 case V2SFmode:
7297 case V1TImode:
7298 case V1DImode:
7299 if (!type || !AGGREGATE_TYPE_P (type))
7300 {
7301 cum->mmx_words += words;
7302 cum->mmx_nregs -= 1;
7303 cum->mmx_regno += 1;
7304 if (cum->mmx_nregs <= 0)
7305 {
7306 cum->mmx_nregs = 0;
7307 cum->mmx_regno = 0;
7308 }
7309 }
7310 break;
7311 }
7312 }
7313
7314 static void
7315 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7316 const_tree type, HOST_WIDE_INT words, bool named)
7317 {
7318 int int_nregs, sse_nregs;
7319
7320 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7321 if (!named && (VALID_AVX512F_REG_MODE (mode)
7322 || VALID_AVX256_REG_MODE (mode)))
7323 return;
7324
7325 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7326 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7327 {
7328 cum->nregs -= int_nregs;
7329 cum->sse_nregs -= sse_nregs;
7330 cum->regno += int_nregs;
7331 cum->sse_regno += sse_nregs;
7332 }
7333 else
7334 {
7335 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7336 cum->words = (cum->words + align - 1) & ~(align - 1);
7337 cum->words += words;
7338 }
7339 }
7340
7341 static void
7342 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7343 HOST_WIDE_INT words)
7344 {
7345 /* Otherwise, this should be passed indirect. */
7346 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7347
7348 cum->words += words;
7349 if (cum->nregs > 0)
7350 {
7351 cum->nregs -= 1;
7352 cum->regno += 1;
7353 }
7354 }
7355
7356 /* Update the data in CUM to advance over an argument of mode MODE and
7357 data type TYPE. (TYPE is null for libcalls where that information
7358 may not be available.) */
7359
7360 static void
7361 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7362 const_tree type, bool named)
7363 {
7364 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7365 HOST_WIDE_INT bytes, words;
7366
7367 if (mode == BLKmode)
7368 bytes = int_size_in_bytes (type);
7369 else
7370 bytes = GET_MODE_SIZE (mode);
7371 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7372
7373 if (type)
7374 mode = type_natural_mode (type, NULL, false);
7375
7376 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7377 function_arg_advance_ms_64 (cum, bytes, words);
7378 else if (TARGET_64BIT)
7379 function_arg_advance_64 (cum, mode, type, words, named);
7380 else
7381 function_arg_advance_32 (cum, mode, type, bytes, words);
7382 }
7383
7384 /* Define where to put the arguments to a function.
7385 Value is zero to push the argument on the stack,
7386 or a hard register in which to store the argument.
7387
7388 MODE is the argument's machine mode.
7389 TYPE is the data type of the argument (as a tree).
7390 This is null for libcalls where that information may
7391 not be available.
7392 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7393 the preceding args and about the function being called.
7394 NAMED is nonzero if this argument is a named parameter
7395 (otherwise it is an extra parameter matching an ellipsis). */
7396
7397 static rtx
7398 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7399 enum machine_mode orig_mode, const_tree type,
7400 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7401 {
7402 /* Avoid the AL settings for the Unix64 ABI. */
7403 if (mode == VOIDmode)
7404 return constm1_rtx;
7405
7406 switch (mode)
7407 {
7408 default:
7409 break;
7410
7411 case BLKmode:
7412 if (bytes < 0)
7413 break;
7414 /* FALLTHRU */
7415 case DImode:
7416 case SImode:
7417 case HImode:
7418 case QImode:
7419 if (words <= cum->nregs)
7420 {
7421 int regno = cum->regno;
7422
7423 /* Fastcall allocates the first two DWORD (SImode) or
7424 smaller arguments to ECX and EDX if it isn't an
7425 aggregate type . */
7426 if (cum->fastcall)
7427 {
7428 if (mode == BLKmode
7429 || mode == DImode
7430 || (type && AGGREGATE_TYPE_P (type)))
7431 break;
7432
7433 /* ECX not EAX is the first allocated register. */
7434 if (regno == AX_REG)
7435 regno = CX_REG;
7436 }
7437 return gen_rtx_REG (mode, regno);
7438 }
7439 break;
7440
7441 case DFmode:
7442 if (cum->float_in_sse < 2)
7443 break;
7444 case SFmode:
7445 if (cum->float_in_sse < 1)
7446 break;
7447 /* FALLTHRU */
7448 case TImode:
7449 /* In 32bit, we pass TImode in xmm registers. */
7450 case V16QImode:
7451 case V8HImode:
7452 case V4SImode:
7453 case V2DImode:
7454 case V4SFmode:
7455 case V2DFmode:
7456 if (!type || !AGGREGATE_TYPE_P (type))
7457 {
7458 if (cum->sse_nregs)
7459 return gen_reg_or_parallel (mode, orig_mode,
7460 cum->sse_regno + FIRST_SSE_REG);
7461 }
7462 break;
7463
7464 case OImode:
7465 case XImode:
7466 /* OImode and XImode shouldn't be used directly. */
7467 gcc_unreachable ();
7468
7469 case V64QImode:
7470 case V32HImode:
7471 case V16SImode:
7472 case V8DImode:
7473 case V16SFmode:
7474 case V8DFmode:
7475 case V8SFmode:
7476 case V8SImode:
7477 case V32QImode:
7478 case V16HImode:
7479 case V4DFmode:
7480 case V4DImode:
7481 if (!type || !AGGREGATE_TYPE_P (type))
7482 {
7483 if (cum->sse_nregs)
7484 return gen_reg_or_parallel (mode, orig_mode,
7485 cum->sse_regno + FIRST_SSE_REG);
7486 }
7487 break;
7488
7489 case V8QImode:
7490 case V4HImode:
7491 case V2SImode:
7492 case V2SFmode:
7493 case V1TImode:
7494 case V1DImode:
7495 if (!type || !AGGREGATE_TYPE_P (type))
7496 {
7497 if (cum->mmx_nregs)
7498 return gen_reg_or_parallel (mode, orig_mode,
7499 cum->mmx_regno + FIRST_MMX_REG);
7500 }
7501 break;
7502 }
7503
7504 return NULL_RTX;
7505 }
7506
7507 static rtx
7508 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7509 enum machine_mode orig_mode, const_tree type, bool named)
7510 {
7511 /* Handle a hidden AL argument containing number of registers
7512 for varargs x86-64 functions. */
7513 if (mode == VOIDmode)
7514 return GEN_INT (cum->maybe_vaarg
7515 ? (cum->sse_nregs < 0
7516 ? X86_64_SSE_REGPARM_MAX
7517 : cum->sse_regno)
7518 : -1);
7519
7520 switch (mode)
7521 {
7522 default:
7523 break;
7524
7525 case V8SFmode:
7526 case V8SImode:
7527 case V32QImode:
7528 case V16HImode:
7529 case V4DFmode:
7530 case V4DImode:
7531 case V16SFmode:
7532 case V16SImode:
7533 case V64QImode:
7534 case V32HImode:
7535 case V8DFmode:
7536 case V8DImode:
7537 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7538 if (!named)
7539 return NULL;
7540 break;
7541 }
7542
7543 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7544 cum->sse_nregs,
7545 &x86_64_int_parameter_registers [cum->regno],
7546 cum->sse_regno);
7547 }
7548
7549 static rtx
7550 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7551 enum machine_mode orig_mode, bool named,
7552 HOST_WIDE_INT bytes)
7553 {
7554 unsigned int regno;
7555
7556 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7557 We use value of -2 to specify that current function call is MSABI. */
7558 if (mode == VOIDmode)
7559 return GEN_INT (-2);
7560
7561 /* If we've run out of registers, it goes on the stack. */
7562 if (cum->nregs == 0)
7563 return NULL_RTX;
7564
7565 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7566
7567 /* Only floating point modes are passed in anything but integer regs. */
7568 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7569 {
7570 if (named)
7571 regno = cum->regno + FIRST_SSE_REG;
7572 else
7573 {
7574 rtx t1, t2;
7575
7576 /* Unnamed floating parameters are passed in both the
7577 SSE and integer registers. */
7578 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7579 t2 = gen_rtx_REG (mode, regno);
7580 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7581 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7582 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7583 }
7584 }
7585 /* Handle aggregated types passed in register. */
7586 if (orig_mode == BLKmode)
7587 {
7588 if (bytes > 0 && bytes <= 8)
7589 mode = (bytes > 4 ? DImode : SImode);
7590 if (mode == BLKmode)
7591 mode = DImode;
7592 }
7593
7594 return gen_reg_or_parallel (mode, orig_mode, regno);
7595 }
7596
7597 /* Return where to put the arguments to a function.
7598 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7599
7600 MODE is the argument's machine mode. TYPE is the data type of the
7601 argument. It is null for libcalls where that information may not be
7602 available. CUM gives information about the preceding args and about
7603 the function being called. NAMED is nonzero if this argument is a
7604 named parameter (otherwise it is an extra parameter matching an
7605 ellipsis). */
7606
7607 static rtx
7608 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7609 const_tree type, bool named)
7610 {
7611 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7612 enum machine_mode mode = omode;
7613 HOST_WIDE_INT bytes, words;
7614 rtx arg;
7615
7616 if (mode == BLKmode)
7617 bytes = int_size_in_bytes (type);
7618 else
7619 bytes = GET_MODE_SIZE (mode);
7620 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7621
7622 /* To simplify the code below, represent vector types with a vector mode
7623 even if MMX/SSE are not active. */
7624 if (type && TREE_CODE (type) == VECTOR_TYPE)
7625 mode = type_natural_mode (type, cum, false);
7626
7627 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7628 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7629 else if (TARGET_64BIT)
7630 arg = function_arg_64 (cum, mode, omode, type, named);
7631 else
7632 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7633
7634 return arg;
7635 }
7636
7637 /* A C expression that indicates when an argument must be passed by
7638 reference. If nonzero for an argument, a copy of that argument is
7639 made in memory and a pointer to the argument is passed instead of
7640 the argument itself. The pointer is passed in whatever way is
7641 appropriate for passing a pointer to that type. */
7642
7643 static bool
7644 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7645 const_tree type, bool)
7646 {
7647 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7648
7649 /* See Windows x64 Software Convention. */
7650 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7651 {
7652 int msize = (int) GET_MODE_SIZE (mode);
7653 if (type)
7654 {
7655 /* Arrays are passed by reference. */
7656 if (TREE_CODE (type) == ARRAY_TYPE)
7657 return true;
7658
7659 if (AGGREGATE_TYPE_P (type))
7660 {
7661 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7662 are passed by reference. */
7663 msize = int_size_in_bytes (type);
7664 }
7665 }
7666
7667 /* __m128 is passed by reference. */
7668 switch (msize) {
7669 case 1: case 2: case 4: case 8:
7670 break;
7671 default:
7672 return true;
7673 }
7674 }
7675 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7676 return 1;
7677
7678 return 0;
7679 }
7680
7681 /* Return true when TYPE should be 128bit aligned for 32bit argument
7682 passing ABI. XXX: This function is obsolete and is only used for
7683 checking psABI compatibility with previous versions of GCC. */
7684
7685 static bool
7686 ix86_compat_aligned_value_p (const_tree type)
7687 {
7688 enum machine_mode mode = TYPE_MODE (type);
7689 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7690 || mode == TDmode
7691 || mode == TFmode
7692 || mode == TCmode)
7693 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7694 return true;
7695 if (TYPE_ALIGN (type) < 128)
7696 return false;
7697
7698 if (AGGREGATE_TYPE_P (type))
7699 {
7700 /* Walk the aggregates recursively. */
7701 switch (TREE_CODE (type))
7702 {
7703 case RECORD_TYPE:
7704 case UNION_TYPE:
7705 case QUAL_UNION_TYPE:
7706 {
7707 tree field;
7708
7709 /* Walk all the structure fields. */
7710 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7711 {
7712 if (TREE_CODE (field) == FIELD_DECL
7713 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7714 return true;
7715 }
7716 break;
7717 }
7718
7719 case ARRAY_TYPE:
7720 /* Just for use if some languages passes arrays by value. */
7721 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7722 return true;
7723 break;
7724
7725 default:
7726 gcc_unreachable ();
7727 }
7728 }
7729 return false;
7730 }
7731
7732 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7733 XXX: This function is obsolete and is only used for checking psABI
7734 compatibility with previous versions of GCC. */
7735
7736 static unsigned int
7737 ix86_compat_function_arg_boundary (enum machine_mode mode,
7738 const_tree type, unsigned int align)
7739 {
7740 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7741 natural boundaries. */
7742 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7743 {
7744 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7745 make an exception for SSE modes since these require 128bit
7746 alignment.
7747
7748 The handling here differs from field_alignment. ICC aligns MMX
7749 arguments to 4 byte boundaries, while structure fields are aligned
7750 to 8 byte boundaries. */
7751 if (!type)
7752 {
7753 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7754 align = PARM_BOUNDARY;
7755 }
7756 else
7757 {
7758 if (!ix86_compat_aligned_value_p (type))
7759 align = PARM_BOUNDARY;
7760 }
7761 }
7762 if (align > BIGGEST_ALIGNMENT)
7763 align = BIGGEST_ALIGNMENT;
7764 return align;
7765 }
7766
7767 /* Return true when TYPE should be 128bit aligned for 32bit argument
7768 passing ABI. */
7769
7770 static bool
7771 ix86_contains_aligned_value_p (const_tree type)
7772 {
7773 enum machine_mode mode = TYPE_MODE (type);
7774
7775 if (mode == XFmode || mode == XCmode)
7776 return false;
7777
7778 if (TYPE_ALIGN (type) < 128)
7779 return false;
7780
7781 if (AGGREGATE_TYPE_P (type))
7782 {
7783 /* Walk the aggregates recursively. */
7784 switch (TREE_CODE (type))
7785 {
7786 case RECORD_TYPE:
7787 case UNION_TYPE:
7788 case QUAL_UNION_TYPE:
7789 {
7790 tree field;
7791
7792 /* Walk all the structure fields. */
7793 for (field = TYPE_FIELDS (type);
7794 field;
7795 field = DECL_CHAIN (field))
7796 {
7797 if (TREE_CODE (field) == FIELD_DECL
7798 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7799 return true;
7800 }
7801 break;
7802 }
7803
7804 case ARRAY_TYPE:
7805 /* Just for use if some languages passes arrays by value. */
7806 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7807 return true;
7808 break;
7809
7810 default:
7811 gcc_unreachable ();
7812 }
7813 }
7814 else
7815 return TYPE_ALIGN (type) >= 128;
7816
7817 return false;
7818 }
7819
7820 /* Gives the alignment boundary, in bits, of an argument with the
7821 specified mode and type. */
7822
7823 static unsigned int
7824 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7825 {
7826 unsigned int align;
7827 if (type)
7828 {
7829 /* Since the main variant type is used for call, we convert it to
7830 the main variant type. */
7831 type = TYPE_MAIN_VARIANT (type);
7832 align = TYPE_ALIGN (type);
7833 }
7834 else
7835 align = GET_MODE_ALIGNMENT (mode);
7836 if (align < PARM_BOUNDARY)
7837 align = PARM_BOUNDARY;
7838 else
7839 {
7840 static bool warned;
7841 unsigned int saved_align = align;
7842
7843 if (!TARGET_64BIT)
7844 {
7845 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7846 if (!type)
7847 {
7848 if (mode == XFmode || mode == XCmode)
7849 align = PARM_BOUNDARY;
7850 }
7851 else if (!ix86_contains_aligned_value_p (type))
7852 align = PARM_BOUNDARY;
7853
7854 if (align < 128)
7855 align = PARM_BOUNDARY;
7856 }
7857
7858 if (warn_psabi
7859 && !warned
7860 && align != ix86_compat_function_arg_boundary (mode, type,
7861 saved_align))
7862 {
7863 warned = true;
7864 inform (input_location,
7865 "The ABI for passing parameters with %d-byte"
7866 " alignment has changed in GCC 4.6",
7867 align / BITS_PER_UNIT);
7868 }
7869 }
7870
7871 return align;
7872 }
7873
7874 /* Return true if N is a possible register number of function value. */
7875
7876 static bool
7877 ix86_function_value_regno_p (const unsigned int regno)
7878 {
7879 switch (regno)
7880 {
7881 case AX_REG:
7882 return true;
7883 case DX_REG:
7884 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7885 case DI_REG:
7886 case SI_REG:
7887 return TARGET_64BIT && ix86_abi != MS_ABI;
7888
7889 /* Complex values are returned in %st(0)/%st(1) pair. */
7890 case ST0_REG:
7891 case ST1_REG:
7892 /* TODO: The function should depend on current function ABI but
7893 builtins.c would need updating then. Therefore we use the
7894 default ABI. */
7895 if (TARGET_64BIT && ix86_abi == MS_ABI)
7896 return false;
7897 return TARGET_FLOAT_RETURNS_IN_80387;
7898
7899 /* Complex values are returned in %xmm0/%xmm1 pair. */
7900 case XMM0_REG:
7901 case XMM1_REG:
7902 return TARGET_SSE;
7903
7904 case MM0_REG:
7905 if (TARGET_MACHO || TARGET_64BIT)
7906 return false;
7907 return TARGET_MMX;
7908 }
7909
7910 return false;
7911 }
7912
7913 /* Define how to find the value returned by a function.
7914 VALTYPE is the data type of the value (as a tree).
7915 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7916 otherwise, FUNC is 0. */
7917
7918 static rtx
7919 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7920 const_tree fntype, const_tree fn)
7921 {
7922 unsigned int regno;
7923
7924 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7925 we normally prevent this case when mmx is not available. However
7926 some ABIs may require the result to be returned like DImode. */
7927 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7928 regno = FIRST_MMX_REG;
7929
7930 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7931 we prevent this case when sse is not available. However some ABIs
7932 may require the result to be returned like integer TImode. */
7933 else if (mode == TImode
7934 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7935 regno = FIRST_SSE_REG;
7936
7937 /* 32-byte vector modes in %ymm0. */
7938 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7939 regno = FIRST_SSE_REG;
7940
7941 /* 64-byte vector modes in %zmm0. */
7942 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7943 regno = FIRST_SSE_REG;
7944
7945 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7946 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7947 regno = FIRST_FLOAT_REG;
7948 else
7949 /* Most things go in %eax. */
7950 regno = AX_REG;
7951
7952 /* Override FP return register with %xmm0 for local functions when
7953 SSE math is enabled or for functions with sseregparm attribute. */
7954 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7955 {
7956 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7957 if ((sse_level >= 1 && mode == SFmode)
7958 || (sse_level == 2 && mode == DFmode))
7959 regno = FIRST_SSE_REG;
7960 }
7961
7962 /* OImode shouldn't be used directly. */
7963 gcc_assert (mode != OImode);
7964
7965 return gen_rtx_REG (orig_mode, regno);
7966 }
7967
7968 static rtx
7969 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7970 const_tree valtype)
7971 {
7972 rtx ret;
7973
7974 /* Handle libcalls, which don't provide a type node. */
7975 if (valtype == NULL)
7976 {
7977 unsigned int regno;
7978
7979 switch (mode)
7980 {
7981 case SFmode:
7982 case SCmode:
7983 case DFmode:
7984 case DCmode:
7985 case TFmode:
7986 case SDmode:
7987 case DDmode:
7988 case TDmode:
7989 regno = FIRST_SSE_REG;
7990 break;
7991 case XFmode:
7992 case XCmode:
7993 regno = FIRST_FLOAT_REG;
7994 break;
7995 case TCmode:
7996 return NULL;
7997 default:
7998 regno = AX_REG;
7999 }
8000
8001 return gen_rtx_REG (mode, regno);
8002 }
8003 else if (POINTER_TYPE_P (valtype))
8004 {
8005 /* Pointers are always returned in word_mode. */
8006 mode = word_mode;
8007 }
8008
8009 ret = construct_container (mode, orig_mode, valtype, 1,
8010 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8011 x86_64_int_return_registers, 0);
8012
8013 /* For zero sized structures, construct_container returns NULL, but we
8014 need to keep rest of compiler happy by returning meaningful value. */
8015 if (!ret)
8016 ret = gen_rtx_REG (orig_mode, AX_REG);
8017
8018 return ret;
8019 }
8020
8021 static rtx
8022 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
8023 const_tree valtype)
8024 {
8025 unsigned int regno = AX_REG;
8026
8027 if (TARGET_SSE)
8028 {
8029 switch (GET_MODE_SIZE (mode))
8030 {
8031 case 16:
8032 if (valtype != NULL_TREE
8033 && !VECTOR_INTEGER_TYPE_P (valtype)
8034 && !VECTOR_INTEGER_TYPE_P (valtype)
8035 && !INTEGRAL_TYPE_P (valtype)
8036 && !VECTOR_FLOAT_TYPE_P (valtype))
8037 break;
8038 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8039 && !COMPLEX_MODE_P (mode))
8040 regno = FIRST_SSE_REG;
8041 break;
8042 case 8:
8043 case 4:
8044 if (mode == SFmode || mode == DFmode)
8045 regno = FIRST_SSE_REG;
8046 break;
8047 default:
8048 break;
8049 }
8050 }
8051 return gen_rtx_REG (orig_mode, regno);
8052 }
8053
8054 static rtx
8055 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8056 enum machine_mode orig_mode, enum machine_mode mode)
8057 {
8058 const_tree fn, fntype;
8059
8060 fn = NULL_TREE;
8061 if (fntype_or_decl && DECL_P (fntype_or_decl))
8062 fn = fntype_or_decl;
8063 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8064
8065 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8066 return function_value_ms_64 (orig_mode, mode, valtype);
8067 else if (TARGET_64BIT)
8068 return function_value_64 (orig_mode, mode, valtype);
8069 else
8070 return function_value_32 (orig_mode, mode, fntype, fn);
8071 }
8072
8073 static rtx
8074 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8075 {
8076 enum machine_mode mode, orig_mode;
8077
8078 orig_mode = TYPE_MODE (valtype);
8079 mode = type_natural_mode (valtype, NULL, true);
8080 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8081 }
8082
8083 /* Pointer function arguments and return values are promoted to
8084 word_mode. */
8085
8086 static enum machine_mode
8087 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8088 int *punsignedp, const_tree fntype,
8089 int for_return)
8090 {
8091 if (type != NULL_TREE && POINTER_TYPE_P (type))
8092 {
8093 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8094 return word_mode;
8095 }
8096 return default_promote_function_mode (type, mode, punsignedp, fntype,
8097 for_return);
8098 }
8099
8100 /* Return true if a structure, union or array with MODE containing FIELD
8101 should be accessed using BLKmode. */
8102
8103 static bool
8104 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8105 {
8106 /* Union with XFmode must be in BLKmode. */
8107 return (mode == XFmode
8108 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8109 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8110 }
8111
8112 rtx
8113 ix86_libcall_value (enum machine_mode mode)
8114 {
8115 return ix86_function_value_1 (NULL, NULL, mode, mode);
8116 }
8117
8118 /* Return true iff type is returned in memory. */
8119
8120 static bool
8121 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8122 {
8123 #ifdef SUBTARGET_RETURN_IN_MEMORY
8124 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8125 #else
8126 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8127 HOST_WIDE_INT size;
8128
8129 if (TARGET_64BIT)
8130 {
8131 if (ix86_function_type_abi (fntype) == MS_ABI)
8132 {
8133 size = int_size_in_bytes (type);
8134
8135 /* __m128 is returned in xmm0. */
8136 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8137 || INTEGRAL_TYPE_P (type)
8138 || VECTOR_FLOAT_TYPE_P (type))
8139 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8140 && !COMPLEX_MODE_P (mode)
8141 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8142 return false;
8143
8144 /* Otherwise, the size must be exactly in [1248]. */
8145 return size != 1 && size != 2 && size != 4 && size != 8;
8146 }
8147 else
8148 {
8149 int needed_intregs, needed_sseregs;
8150
8151 return examine_argument (mode, type, 1,
8152 &needed_intregs, &needed_sseregs);
8153 }
8154 }
8155 else
8156 {
8157 if (mode == BLKmode)
8158 return true;
8159
8160 size = int_size_in_bytes (type);
8161
8162 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8163 return false;
8164
8165 if (VECTOR_MODE_P (mode) || mode == TImode)
8166 {
8167 /* User-created vectors small enough to fit in EAX. */
8168 if (size < 8)
8169 return false;
8170
8171 /* Unless ABI prescibes otherwise,
8172 MMX/3dNow values are returned in MM0 if available. */
8173
8174 if (size == 8)
8175 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8176
8177 /* SSE values are returned in XMM0 if available. */
8178 if (size == 16)
8179 return !TARGET_SSE;
8180
8181 /* AVX values are returned in YMM0 if available. */
8182 if (size == 32)
8183 return !TARGET_AVX;
8184
8185 /* AVX512F values are returned in ZMM0 if available. */
8186 if (size == 64)
8187 return !TARGET_AVX512F;
8188 }
8189
8190 if (mode == XFmode)
8191 return false;
8192
8193 if (size > 12)
8194 return true;
8195
8196 /* OImode shouldn't be used directly. */
8197 gcc_assert (mode != OImode);
8198
8199 return false;
8200 }
8201 #endif
8202 }
8203
8204 \f
8205 /* Create the va_list data type. */
8206
8207 /* Returns the calling convention specific va_list date type.
8208 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8209
8210 static tree
8211 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8212 {
8213 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8214
8215 /* For i386 we use plain pointer to argument area. */
8216 if (!TARGET_64BIT || abi == MS_ABI)
8217 return build_pointer_type (char_type_node);
8218
8219 record = lang_hooks.types.make_type (RECORD_TYPE);
8220 type_decl = build_decl (BUILTINS_LOCATION,
8221 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8222
8223 f_gpr = build_decl (BUILTINS_LOCATION,
8224 FIELD_DECL, get_identifier ("gp_offset"),
8225 unsigned_type_node);
8226 f_fpr = build_decl (BUILTINS_LOCATION,
8227 FIELD_DECL, get_identifier ("fp_offset"),
8228 unsigned_type_node);
8229 f_ovf = build_decl (BUILTINS_LOCATION,
8230 FIELD_DECL, get_identifier ("overflow_arg_area"),
8231 ptr_type_node);
8232 f_sav = build_decl (BUILTINS_LOCATION,
8233 FIELD_DECL, get_identifier ("reg_save_area"),
8234 ptr_type_node);
8235
8236 va_list_gpr_counter_field = f_gpr;
8237 va_list_fpr_counter_field = f_fpr;
8238
8239 DECL_FIELD_CONTEXT (f_gpr) = record;
8240 DECL_FIELD_CONTEXT (f_fpr) = record;
8241 DECL_FIELD_CONTEXT (f_ovf) = record;
8242 DECL_FIELD_CONTEXT (f_sav) = record;
8243
8244 TYPE_STUB_DECL (record) = type_decl;
8245 TYPE_NAME (record) = type_decl;
8246 TYPE_FIELDS (record) = f_gpr;
8247 DECL_CHAIN (f_gpr) = f_fpr;
8248 DECL_CHAIN (f_fpr) = f_ovf;
8249 DECL_CHAIN (f_ovf) = f_sav;
8250
8251 layout_type (record);
8252
8253 /* The correct type is an array type of one element. */
8254 return build_array_type (record, build_index_type (size_zero_node));
8255 }
8256
8257 /* Setup the builtin va_list data type and for 64-bit the additional
8258 calling convention specific va_list data types. */
8259
8260 static tree
8261 ix86_build_builtin_va_list (void)
8262 {
8263 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8264
8265 /* Initialize abi specific va_list builtin types. */
8266 if (TARGET_64BIT)
8267 {
8268 tree t;
8269 if (ix86_abi == MS_ABI)
8270 {
8271 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8272 if (TREE_CODE (t) != RECORD_TYPE)
8273 t = build_variant_type_copy (t);
8274 sysv_va_list_type_node = t;
8275 }
8276 else
8277 {
8278 t = ret;
8279 if (TREE_CODE (t) != RECORD_TYPE)
8280 t = build_variant_type_copy (t);
8281 sysv_va_list_type_node = t;
8282 }
8283 if (ix86_abi != MS_ABI)
8284 {
8285 t = ix86_build_builtin_va_list_abi (MS_ABI);
8286 if (TREE_CODE (t) != RECORD_TYPE)
8287 t = build_variant_type_copy (t);
8288 ms_va_list_type_node = t;
8289 }
8290 else
8291 {
8292 t = ret;
8293 if (TREE_CODE (t) != RECORD_TYPE)
8294 t = build_variant_type_copy (t);
8295 ms_va_list_type_node = t;
8296 }
8297 }
8298
8299 return ret;
8300 }
8301
8302 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8303
8304 static void
8305 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8306 {
8307 rtx save_area, mem;
8308 alias_set_type set;
8309 int i, max;
8310
8311 /* GPR size of varargs save area. */
8312 if (cfun->va_list_gpr_size)
8313 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8314 else
8315 ix86_varargs_gpr_size = 0;
8316
8317 /* FPR size of varargs save area. We don't need it if we don't pass
8318 anything in SSE registers. */
8319 if (TARGET_SSE && cfun->va_list_fpr_size)
8320 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8321 else
8322 ix86_varargs_fpr_size = 0;
8323
8324 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8325 return;
8326
8327 save_area = frame_pointer_rtx;
8328 set = get_varargs_alias_set ();
8329
8330 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8331 if (max > X86_64_REGPARM_MAX)
8332 max = X86_64_REGPARM_MAX;
8333
8334 for (i = cum->regno; i < max; i++)
8335 {
8336 mem = gen_rtx_MEM (word_mode,
8337 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8338 MEM_NOTRAP_P (mem) = 1;
8339 set_mem_alias_set (mem, set);
8340 emit_move_insn (mem,
8341 gen_rtx_REG (word_mode,
8342 x86_64_int_parameter_registers[i]));
8343 }
8344
8345 if (ix86_varargs_fpr_size)
8346 {
8347 enum machine_mode smode;
8348 rtx_code_label *label;
8349 rtx test;
8350
8351 /* Now emit code to save SSE registers. The AX parameter contains number
8352 of SSE parameter registers used to call this function, though all we
8353 actually check here is the zero/non-zero status. */
8354
8355 label = gen_label_rtx ();
8356 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8357 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8358 label));
8359
8360 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8361 we used movdqa (i.e. TImode) instead? Perhaps even better would
8362 be if we could determine the real mode of the data, via a hook
8363 into pass_stdarg. Ignore all that for now. */
8364 smode = V4SFmode;
8365 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8366 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8367
8368 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8369 if (max > X86_64_SSE_REGPARM_MAX)
8370 max = X86_64_SSE_REGPARM_MAX;
8371
8372 for (i = cum->sse_regno; i < max; ++i)
8373 {
8374 mem = plus_constant (Pmode, save_area,
8375 i * 16 + ix86_varargs_gpr_size);
8376 mem = gen_rtx_MEM (smode, mem);
8377 MEM_NOTRAP_P (mem) = 1;
8378 set_mem_alias_set (mem, set);
8379 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8380
8381 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8382 }
8383
8384 emit_label (label);
8385 }
8386 }
8387
8388 static void
8389 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8390 {
8391 alias_set_type set = get_varargs_alias_set ();
8392 int i;
8393
8394 /* Reset to zero, as there might be a sysv vaarg used
8395 before. */
8396 ix86_varargs_gpr_size = 0;
8397 ix86_varargs_fpr_size = 0;
8398
8399 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8400 {
8401 rtx reg, mem;
8402
8403 mem = gen_rtx_MEM (Pmode,
8404 plus_constant (Pmode, virtual_incoming_args_rtx,
8405 i * UNITS_PER_WORD));
8406 MEM_NOTRAP_P (mem) = 1;
8407 set_mem_alias_set (mem, set);
8408
8409 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8410 emit_move_insn (mem, reg);
8411 }
8412 }
8413
8414 static void
8415 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8416 tree type, int *, int no_rtl)
8417 {
8418 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8419 CUMULATIVE_ARGS next_cum;
8420 tree fntype;
8421
8422 /* This argument doesn't appear to be used anymore. Which is good,
8423 because the old code here didn't suppress rtl generation. */
8424 gcc_assert (!no_rtl);
8425
8426 if (!TARGET_64BIT)
8427 return;
8428
8429 fntype = TREE_TYPE (current_function_decl);
8430
8431 /* For varargs, we do not want to skip the dummy va_dcl argument.
8432 For stdargs, we do want to skip the last named argument. */
8433 next_cum = *cum;
8434 if (stdarg_p (fntype))
8435 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8436 true);
8437
8438 if (cum->call_abi == MS_ABI)
8439 setup_incoming_varargs_ms_64 (&next_cum);
8440 else
8441 setup_incoming_varargs_64 (&next_cum);
8442 }
8443
8444 /* Checks if TYPE is of kind va_list char *. */
8445
8446 static bool
8447 is_va_list_char_pointer (tree type)
8448 {
8449 tree canonic;
8450
8451 /* For 32-bit it is always true. */
8452 if (!TARGET_64BIT)
8453 return true;
8454 canonic = ix86_canonical_va_list_type (type);
8455 return (canonic == ms_va_list_type_node
8456 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8457 }
8458
8459 /* Implement va_start. */
8460
8461 static void
8462 ix86_va_start (tree valist, rtx nextarg)
8463 {
8464 HOST_WIDE_INT words, n_gpr, n_fpr;
8465 tree f_gpr, f_fpr, f_ovf, f_sav;
8466 tree gpr, fpr, ovf, sav, t;
8467 tree type;
8468 rtx ovf_rtx;
8469
8470 if (flag_split_stack
8471 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8472 {
8473 unsigned int scratch_regno;
8474
8475 /* When we are splitting the stack, we can't refer to the stack
8476 arguments using internal_arg_pointer, because they may be on
8477 the old stack. The split stack prologue will arrange to
8478 leave a pointer to the old stack arguments in a scratch
8479 register, which we here copy to a pseudo-register. The split
8480 stack prologue can't set the pseudo-register directly because
8481 it (the prologue) runs before any registers have been saved. */
8482
8483 scratch_regno = split_stack_prologue_scratch_regno ();
8484 if (scratch_regno != INVALID_REGNUM)
8485 {
8486 rtx reg;
8487 rtx_insn *seq;
8488
8489 reg = gen_reg_rtx (Pmode);
8490 cfun->machine->split_stack_varargs_pointer = reg;
8491
8492 start_sequence ();
8493 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8494 seq = get_insns ();
8495 end_sequence ();
8496
8497 push_topmost_sequence ();
8498 emit_insn_after (seq, entry_of_function ());
8499 pop_topmost_sequence ();
8500 }
8501 }
8502
8503 /* Only 64bit target needs something special. */
8504 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8505 {
8506 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8507 std_expand_builtin_va_start (valist, nextarg);
8508 else
8509 {
8510 rtx va_r, next;
8511
8512 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8513 next = expand_binop (ptr_mode, add_optab,
8514 cfun->machine->split_stack_varargs_pointer,
8515 crtl->args.arg_offset_rtx,
8516 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8517 convert_move (va_r, next, 0);
8518 }
8519 return;
8520 }
8521
8522 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8523 f_fpr = DECL_CHAIN (f_gpr);
8524 f_ovf = DECL_CHAIN (f_fpr);
8525 f_sav = DECL_CHAIN (f_ovf);
8526
8527 valist = build_simple_mem_ref (valist);
8528 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8529 /* The following should be folded into the MEM_REF offset. */
8530 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8531 f_gpr, NULL_TREE);
8532 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8533 f_fpr, NULL_TREE);
8534 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8535 f_ovf, NULL_TREE);
8536 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8537 f_sav, NULL_TREE);
8538
8539 /* Count number of gp and fp argument registers used. */
8540 words = crtl->args.info.words;
8541 n_gpr = crtl->args.info.regno;
8542 n_fpr = crtl->args.info.sse_regno;
8543
8544 if (cfun->va_list_gpr_size)
8545 {
8546 type = TREE_TYPE (gpr);
8547 t = build2 (MODIFY_EXPR, type,
8548 gpr, build_int_cst (type, n_gpr * 8));
8549 TREE_SIDE_EFFECTS (t) = 1;
8550 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8551 }
8552
8553 if (TARGET_SSE && cfun->va_list_fpr_size)
8554 {
8555 type = TREE_TYPE (fpr);
8556 t = build2 (MODIFY_EXPR, type, fpr,
8557 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8558 TREE_SIDE_EFFECTS (t) = 1;
8559 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8560 }
8561
8562 /* Find the overflow area. */
8563 type = TREE_TYPE (ovf);
8564 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8565 ovf_rtx = crtl->args.internal_arg_pointer;
8566 else
8567 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8568 t = make_tree (type, ovf_rtx);
8569 if (words != 0)
8570 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8571 t = build2 (MODIFY_EXPR, type, ovf, t);
8572 TREE_SIDE_EFFECTS (t) = 1;
8573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8574
8575 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8576 {
8577 /* Find the register save area.
8578 Prologue of the function save it right above stack frame. */
8579 type = TREE_TYPE (sav);
8580 t = make_tree (type, frame_pointer_rtx);
8581 if (!ix86_varargs_gpr_size)
8582 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8583 t = build2 (MODIFY_EXPR, type, sav, t);
8584 TREE_SIDE_EFFECTS (t) = 1;
8585 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8586 }
8587 }
8588
8589 /* Implement va_arg. */
8590
8591 static tree
8592 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8593 gimple_seq *post_p)
8594 {
8595 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8596 tree f_gpr, f_fpr, f_ovf, f_sav;
8597 tree gpr, fpr, ovf, sav, t;
8598 int size, rsize;
8599 tree lab_false, lab_over = NULL_TREE;
8600 tree addr, t2;
8601 rtx container;
8602 int indirect_p = 0;
8603 tree ptrtype;
8604 enum machine_mode nat_mode;
8605 unsigned int arg_boundary;
8606
8607 /* Only 64bit target needs something special. */
8608 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8609 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8610
8611 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8612 f_fpr = DECL_CHAIN (f_gpr);
8613 f_ovf = DECL_CHAIN (f_fpr);
8614 f_sav = DECL_CHAIN (f_ovf);
8615
8616 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8617 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8618 valist = build_va_arg_indirect_ref (valist);
8619 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8620 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8621 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8622
8623 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8624 if (indirect_p)
8625 type = build_pointer_type (type);
8626 size = int_size_in_bytes (type);
8627 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8628
8629 nat_mode = type_natural_mode (type, NULL, false);
8630 switch (nat_mode)
8631 {
8632 case V8SFmode:
8633 case V8SImode:
8634 case V32QImode:
8635 case V16HImode:
8636 case V4DFmode:
8637 case V4DImode:
8638 case V16SFmode:
8639 case V16SImode:
8640 case V64QImode:
8641 case V32HImode:
8642 case V8DFmode:
8643 case V8DImode:
8644 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8645 if (!TARGET_64BIT_MS_ABI)
8646 {
8647 container = NULL;
8648 break;
8649 }
8650
8651 default:
8652 container = construct_container (nat_mode, TYPE_MODE (type),
8653 type, 0, X86_64_REGPARM_MAX,
8654 X86_64_SSE_REGPARM_MAX, intreg,
8655 0);
8656 break;
8657 }
8658
8659 /* Pull the value out of the saved registers. */
8660
8661 addr = create_tmp_var (ptr_type_node, "addr");
8662
8663 if (container)
8664 {
8665 int needed_intregs, needed_sseregs;
8666 bool need_temp;
8667 tree int_addr, sse_addr;
8668
8669 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8670 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8671
8672 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8673
8674 need_temp = (!REG_P (container)
8675 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8676 || TYPE_ALIGN (type) > 128));
8677
8678 /* In case we are passing structure, verify that it is consecutive block
8679 on the register save area. If not we need to do moves. */
8680 if (!need_temp && !REG_P (container))
8681 {
8682 /* Verify that all registers are strictly consecutive */
8683 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8684 {
8685 int i;
8686
8687 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8688 {
8689 rtx slot = XVECEXP (container, 0, i);
8690 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8691 || INTVAL (XEXP (slot, 1)) != i * 16)
8692 need_temp = 1;
8693 }
8694 }
8695 else
8696 {
8697 int i;
8698
8699 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8700 {
8701 rtx slot = XVECEXP (container, 0, i);
8702 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8703 || INTVAL (XEXP (slot, 1)) != i * 8)
8704 need_temp = 1;
8705 }
8706 }
8707 }
8708 if (!need_temp)
8709 {
8710 int_addr = addr;
8711 sse_addr = addr;
8712 }
8713 else
8714 {
8715 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8716 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8717 }
8718
8719 /* First ensure that we fit completely in registers. */
8720 if (needed_intregs)
8721 {
8722 t = build_int_cst (TREE_TYPE (gpr),
8723 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8724 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8725 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8726 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8727 gimplify_and_add (t, pre_p);
8728 }
8729 if (needed_sseregs)
8730 {
8731 t = build_int_cst (TREE_TYPE (fpr),
8732 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8733 + X86_64_REGPARM_MAX * 8);
8734 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8735 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8736 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8737 gimplify_and_add (t, pre_p);
8738 }
8739
8740 /* Compute index to start of area used for integer regs. */
8741 if (needed_intregs)
8742 {
8743 /* int_addr = gpr + sav; */
8744 t = fold_build_pointer_plus (sav, gpr);
8745 gimplify_assign (int_addr, t, pre_p);
8746 }
8747 if (needed_sseregs)
8748 {
8749 /* sse_addr = fpr + sav; */
8750 t = fold_build_pointer_plus (sav, fpr);
8751 gimplify_assign (sse_addr, t, pre_p);
8752 }
8753 if (need_temp)
8754 {
8755 int i, prev_size = 0;
8756 tree temp = create_tmp_var (type, "va_arg_tmp");
8757
8758 /* addr = &temp; */
8759 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8760 gimplify_assign (addr, t, pre_p);
8761
8762 for (i = 0; i < XVECLEN (container, 0); i++)
8763 {
8764 rtx slot = XVECEXP (container, 0, i);
8765 rtx reg = XEXP (slot, 0);
8766 enum machine_mode mode = GET_MODE (reg);
8767 tree piece_type;
8768 tree addr_type;
8769 tree daddr_type;
8770 tree src_addr, src;
8771 int src_offset;
8772 tree dest_addr, dest;
8773 int cur_size = GET_MODE_SIZE (mode);
8774
8775 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8776 prev_size = INTVAL (XEXP (slot, 1));
8777 if (prev_size + cur_size > size)
8778 {
8779 cur_size = size - prev_size;
8780 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8781 if (mode == BLKmode)
8782 mode = QImode;
8783 }
8784 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8785 if (mode == GET_MODE (reg))
8786 addr_type = build_pointer_type (piece_type);
8787 else
8788 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8789 true);
8790 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8791 true);
8792
8793 if (SSE_REGNO_P (REGNO (reg)))
8794 {
8795 src_addr = sse_addr;
8796 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8797 }
8798 else
8799 {
8800 src_addr = int_addr;
8801 src_offset = REGNO (reg) * 8;
8802 }
8803 src_addr = fold_convert (addr_type, src_addr);
8804 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8805
8806 dest_addr = fold_convert (daddr_type, addr);
8807 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8808 if (cur_size == GET_MODE_SIZE (mode))
8809 {
8810 src = build_va_arg_indirect_ref (src_addr);
8811 dest = build_va_arg_indirect_ref (dest_addr);
8812
8813 gimplify_assign (dest, src, pre_p);
8814 }
8815 else
8816 {
8817 tree copy
8818 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8819 3, dest_addr, src_addr,
8820 size_int (cur_size));
8821 gimplify_and_add (copy, pre_p);
8822 }
8823 prev_size += cur_size;
8824 }
8825 }
8826
8827 if (needed_intregs)
8828 {
8829 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8830 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8831 gimplify_assign (gpr, t, pre_p);
8832 }
8833
8834 if (needed_sseregs)
8835 {
8836 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8837 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8838 gimplify_assign (fpr, t, pre_p);
8839 }
8840
8841 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8842
8843 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8844 }
8845
8846 /* ... otherwise out of the overflow area. */
8847
8848 /* When we align parameter on stack for caller, if the parameter
8849 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8850 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8851 here with caller. */
8852 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8853 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8854 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8855
8856 /* Care for on-stack alignment if needed. */
8857 if (arg_boundary <= 64 || size == 0)
8858 t = ovf;
8859 else
8860 {
8861 HOST_WIDE_INT align = arg_boundary / 8;
8862 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8863 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8864 build_int_cst (TREE_TYPE (t), -align));
8865 }
8866
8867 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8868 gimplify_assign (addr, t, pre_p);
8869
8870 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8871 gimplify_assign (unshare_expr (ovf), t, pre_p);
8872
8873 if (container)
8874 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8875
8876 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8877 addr = fold_convert (ptrtype, addr);
8878
8879 if (indirect_p)
8880 addr = build_va_arg_indirect_ref (addr);
8881 return build_va_arg_indirect_ref (addr);
8882 }
8883 \f
8884 /* Return true if OPNUM's MEM should be matched
8885 in movabs* patterns. */
8886
8887 bool
8888 ix86_check_movabs (rtx insn, int opnum)
8889 {
8890 rtx set, mem;
8891
8892 set = PATTERN (insn);
8893 if (GET_CODE (set) == PARALLEL)
8894 set = XVECEXP (set, 0, 0);
8895 gcc_assert (GET_CODE (set) == SET);
8896 mem = XEXP (set, opnum);
8897 while (GET_CODE (mem) == SUBREG)
8898 mem = SUBREG_REG (mem);
8899 gcc_assert (MEM_P (mem));
8900 return volatile_ok || !MEM_VOLATILE_P (mem);
8901 }
8902 \f
8903 /* Initialize the table of extra 80387 mathematical constants. */
8904
8905 static void
8906 init_ext_80387_constants (void)
8907 {
8908 static const char * cst[5] =
8909 {
8910 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8911 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8912 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8913 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8914 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8915 };
8916 int i;
8917
8918 for (i = 0; i < 5; i++)
8919 {
8920 real_from_string (&ext_80387_constants_table[i], cst[i]);
8921 /* Ensure each constant is rounded to XFmode precision. */
8922 real_convert (&ext_80387_constants_table[i],
8923 XFmode, &ext_80387_constants_table[i]);
8924 }
8925
8926 ext_80387_constants_init = 1;
8927 }
8928
8929 /* Return non-zero if the constant is something that
8930 can be loaded with a special instruction. */
8931
8932 int
8933 standard_80387_constant_p (rtx x)
8934 {
8935 enum machine_mode mode = GET_MODE (x);
8936
8937 REAL_VALUE_TYPE r;
8938
8939 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8940 return -1;
8941
8942 if (x == CONST0_RTX (mode))
8943 return 1;
8944 if (x == CONST1_RTX (mode))
8945 return 2;
8946
8947 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8948
8949 /* For XFmode constants, try to find a special 80387 instruction when
8950 optimizing for size or on those CPUs that benefit from them. */
8951 if (mode == XFmode
8952 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8953 {
8954 int i;
8955
8956 if (! ext_80387_constants_init)
8957 init_ext_80387_constants ();
8958
8959 for (i = 0; i < 5; i++)
8960 if (real_identical (&r, &ext_80387_constants_table[i]))
8961 return i + 3;
8962 }
8963
8964 /* Load of the constant -0.0 or -1.0 will be split as
8965 fldz;fchs or fld1;fchs sequence. */
8966 if (real_isnegzero (&r))
8967 return 8;
8968 if (real_identical (&r, &dconstm1))
8969 return 9;
8970
8971 return 0;
8972 }
8973
8974 /* Return the opcode of the special instruction to be used to load
8975 the constant X. */
8976
8977 const char *
8978 standard_80387_constant_opcode (rtx x)
8979 {
8980 switch (standard_80387_constant_p (x))
8981 {
8982 case 1:
8983 return "fldz";
8984 case 2:
8985 return "fld1";
8986 case 3:
8987 return "fldlg2";
8988 case 4:
8989 return "fldln2";
8990 case 5:
8991 return "fldl2e";
8992 case 6:
8993 return "fldl2t";
8994 case 7:
8995 return "fldpi";
8996 case 8:
8997 case 9:
8998 return "#";
8999 default:
9000 gcc_unreachable ();
9001 }
9002 }
9003
9004 /* Return the CONST_DOUBLE representing the 80387 constant that is
9005 loaded by the specified special instruction. The argument IDX
9006 matches the return value from standard_80387_constant_p. */
9007
9008 rtx
9009 standard_80387_constant_rtx (int idx)
9010 {
9011 int i;
9012
9013 if (! ext_80387_constants_init)
9014 init_ext_80387_constants ();
9015
9016 switch (idx)
9017 {
9018 case 3:
9019 case 4:
9020 case 5:
9021 case 6:
9022 case 7:
9023 i = idx - 3;
9024 break;
9025
9026 default:
9027 gcc_unreachable ();
9028 }
9029
9030 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9031 XFmode);
9032 }
9033
9034 /* Return 1 if X is all 0s and 2 if x is all 1s
9035 in supported SSE/AVX vector mode. */
9036
9037 int
9038 standard_sse_constant_p (rtx x)
9039 {
9040 enum machine_mode mode = GET_MODE (x);
9041
9042 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9043 return 1;
9044 if (vector_all_ones_operand (x, mode))
9045 switch (mode)
9046 {
9047 case V16QImode:
9048 case V8HImode:
9049 case V4SImode:
9050 case V2DImode:
9051 if (TARGET_SSE2)
9052 return 2;
9053 case V32QImode:
9054 case V16HImode:
9055 case V8SImode:
9056 case V4DImode:
9057 if (TARGET_AVX2)
9058 return 2;
9059 case V64QImode:
9060 case V32HImode:
9061 case V16SImode:
9062 case V8DImode:
9063 if (TARGET_AVX512F)
9064 return 2;
9065 default:
9066 break;
9067 }
9068
9069 return 0;
9070 }
9071
9072 /* Return the opcode of the special instruction to be used to load
9073 the constant X. */
9074
9075 const char *
9076 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9077 {
9078 switch (standard_sse_constant_p (x))
9079 {
9080 case 1:
9081 switch (get_attr_mode (insn))
9082 {
9083 case MODE_XI:
9084 return "vpxord\t%g0, %g0, %g0";
9085 case MODE_V16SF:
9086 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9087 : "vpxord\t%g0, %g0, %g0";
9088 case MODE_V8DF:
9089 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9090 : "vpxorq\t%g0, %g0, %g0";
9091 case MODE_TI:
9092 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9093 : "%vpxor\t%0, %d0";
9094 case MODE_V2DF:
9095 return "%vxorpd\t%0, %d0";
9096 case MODE_V4SF:
9097 return "%vxorps\t%0, %d0";
9098
9099 case MODE_OI:
9100 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9101 : "vpxor\t%x0, %x0, %x0";
9102 case MODE_V4DF:
9103 return "vxorpd\t%x0, %x0, %x0";
9104 case MODE_V8SF:
9105 return "vxorps\t%x0, %x0, %x0";
9106
9107 default:
9108 break;
9109 }
9110
9111 case 2:
9112 if (TARGET_AVX512VL
9113 || get_attr_mode (insn) == MODE_XI
9114 || get_attr_mode (insn) == MODE_V8DF
9115 || get_attr_mode (insn) == MODE_V16SF)
9116 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9117 if (TARGET_AVX)
9118 return "vpcmpeqd\t%0, %0, %0";
9119 else
9120 return "pcmpeqd\t%0, %0";
9121
9122 default:
9123 break;
9124 }
9125 gcc_unreachable ();
9126 }
9127
9128 /* Returns true if OP contains a symbol reference */
9129
9130 bool
9131 symbolic_reference_mentioned_p (rtx op)
9132 {
9133 const char *fmt;
9134 int i;
9135
9136 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9137 return true;
9138
9139 fmt = GET_RTX_FORMAT (GET_CODE (op));
9140 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9141 {
9142 if (fmt[i] == 'E')
9143 {
9144 int j;
9145
9146 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9147 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9148 return true;
9149 }
9150
9151 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9152 return true;
9153 }
9154
9155 return false;
9156 }
9157
9158 /* Return true if it is appropriate to emit `ret' instructions in the
9159 body of a function. Do this only if the epilogue is simple, needing a
9160 couple of insns. Prior to reloading, we can't tell how many registers
9161 must be saved, so return false then. Return false if there is no frame
9162 marker to de-allocate. */
9163
9164 bool
9165 ix86_can_use_return_insn_p (void)
9166 {
9167 struct ix86_frame frame;
9168
9169 if (! reload_completed || frame_pointer_needed)
9170 return 0;
9171
9172 /* Don't allow more than 32k pop, since that's all we can do
9173 with one instruction. */
9174 if (crtl->args.pops_args && crtl->args.size >= 32768)
9175 return 0;
9176
9177 ix86_compute_frame_layout (&frame);
9178 return (frame.stack_pointer_offset == UNITS_PER_WORD
9179 && (frame.nregs + frame.nsseregs) == 0);
9180 }
9181 \f
9182 /* Value should be nonzero if functions must have frame pointers.
9183 Zero means the frame pointer need not be set up (and parms may
9184 be accessed via the stack pointer) in functions that seem suitable. */
9185
9186 static bool
9187 ix86_frame_pointer_required (void)
9188 {
9189 /* If we accessed previous frames, then the generated code expects
9190 to be able to access the saved ebp value in our frame. */
9191 if (cfun->machine->accesses_prev_frame)
9192 return true;
9193
9194 /* Several x86 os'es need a frame pointer for other reasons,
9195 usually pertaining to setjmp. */
9196 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9197 return true;
9198
9199 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9200 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9201 return true;
9202
9203 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9204 allocation is 4GB. */
9205 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9206 return true;
9207
9208 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9209 turns off the frame pointer by default. Turn it back on now if
9210 we've not got a leaf function. */
9211 if (TARGET_OMIT_LEAF_FRAME_POINTER
9212 && (!crtl->is_leaf
9213 || ix86_current_function_calls_tls_descriptor))
9214 return true;
9215
9216 if (crtl->profile && !flag_fentry)
9217 return true;
9218
9219 return false;
9220 }
9221
9222 /* Record that the current function accesses previous call frames. */
9223
9224 void
9225 ix86_setup_frame_addresses (void)
9226 {
9227 cfun->machine->accesses_prev_frame = 1;
9228 }
9229 \f
9230 #ifndef USE_HIDDEN_LINKONCE
9231 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9232 # define USE_HIDDEN_LINKONCE 1
9233 # else
9234 # define USE_HIDDEN_LINKONCE 0
9235 # endif
9236 #endif
9237
9238 static int pic_labels_used;
9239
9240 /* Fills in the label name that should be used for a pc thunk for
9241 the given register. */
9242
9243 static void
9244 get_pc_thunk_name (char name[32], unsigned int regno)
9245 {
9246 gcc_assert (!TARGET_64BIT);
9247
9248 if (USE_HIDDEN_LINKONCE)
9249 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9250 else
9251 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9252 }
9253
9254
9255 /* This function generates code for -fpic that loads %ebx with
9256 the return address of the caller and then returns. */
9257
9258 static void
9259 ix86_code_end (void)
9260 {
9261 rtx xops[2];
9262 int regno;
9263
9264 for (regno = AX_REG; regno <= SP_REG; regno++)
9265 {
9266 char name[32];
9267 tree decl;
9268
9269 if (!(pic_labels_used & (1 << regno)))
9270 continue;
9271
9272 get_pc_thunk_name (name, regno);
9273
9274 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9275 get_identifier (name),
9276 build_function_type_list (void_type_node, NULL_TREE));
9277 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9278 NULL_TREE, void_type_node);
9279 TREE_PUBLIC (decl) = 1;
9280 TREE_STATIC (decl) = 1;
9281 DECL_IGNORED_P (decl) = 1;
9282
9283 #if TARGET_MACHO
9284 if (TARGET_MACHO)
9285 {
9286 switch_to_section (darwin_sections[text_coal_section]);
9287 fputs ("\t.weak_definition\t", asm_out_file);
9288 assemble_name (asm_out_file, name);
9289 fputs ("\n\t.private_extern\t", asm_out_file);
9290 assemble_name (asm_out_file, name);
9291 putc ('\n', asm_out_file);
9292 ASM_OUTPUT_LABEL (asm_out_file, name);
9293 DECL_WEAK (decl) = 1;
9294 }
9295 else
9296 #endif
9297 if (USE_HIDDEN_LINKONCE)
9298 {
9299 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9300
9301 targetm.asm_out.unique_section (decl, 0);
9302 switch_to_section (get_named_section (decl, NULL, 0));
9303
9304 targetm.asm_out.globalize_label (asm_out_file, name);
9305 fputs ("\t.hidden\t", asm_out_file);
9306 assemble_name (asm_out_file, name);
9307 putc ('\n', asm_out_file);
9308 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9309 }
9310 else
9311 {
9312 switch_to_section (text_section);
9313 ASM_OUTPUT_LABEL (asm_out_file, name);
9314 }
9315
9316 DECL_INITIAL (decl) = make_node (BLOCK);
9317 current_function_decl = decl;
9318 init_function_start (decl);
9319 first_function_block_is_cold = false;
9320 /* Make sure unwind info is emitted for the thunk if needed. */
9321 final_start_function (emit_barrier (), asm_out_file, 1);
9322
9323 /* Pad stack IP move with 4 instructions (two NOPs count
9324 as one instruction). */
9325 if (TARGET_PAD_SHORT_FUNCTION)
9326 {
9327 int i = 8;
9328
9329 while (i--)
9330 fputs ("\tnop\n", asm_out_file);
9331 }
9332
9333 xops[0] = gen_rtx_REG (Pmode, regno);
9334 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9335 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9336 fputs ("\tret\n", asm_out_file);
9337 final_end_function ();
9338 init_insn_lengths ();
9339 free_after_compilation (cfun);
9340 set_cfun (NULL);
9341 current_function_decl = NULL;
9342 }
9343
9344 if (flag_split_stack)
9345 file_end_indicate_split_stack ();
9346 }
9347
9348 /* Emit code for the SET_GOT patterns. */
9349
9350 const char *
9351 output_set_got (rtx dest, rtx label)
9352 {
9353 rtx xops[3];
9354
9355 xops[0] = dest;
9356
9357 if (TARGET_VXWORKS_RTP && flag_pic)
9358 {
9359 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9360 xops[2] = gen_rtx_MEM (Pmode,
9361 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9362 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9363
9364 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9365 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9366 an unadorned address. */
9367 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9368 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9369 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9370 return "";
9371 }
9372
9373 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9374
9375 if (!flag_pic)
9376 {
9377 if (TARGET_MACHO)
9378 /* We don't need a pic base, we're not producing pic. */
9379 gcc_unreachable ();
9380
9381 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9382 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9383 targetm.asm_out.internal_label (asm_out_file, "L",
9384 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9385 }
9386 else
9387 {
9388 char name[32];
9389 get_pc_thunk_name (name, REGNO (dest));
9390 pic_labels_used |= 1 << REGNO (dest);
9391
9392 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9393 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9394 output_asm_insn ("call\t%X2", xops);
9395
9396 #if TARGET_MACHO
9397 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9398 This is what will be referenced by the Mach-O PIC subsystem. */
9399 if (machopic_should_output_picbase_label () || !label)
9400 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9401
9402 /* When we are restoring the pic base at the site of a nonlocal label,
9403 and we decided to emit the pic base above, we will still output a
9404 local label used for calculating the correction offset (even though
9405 the offset will be 0 in that case). */
9406 if (label)
9407 targetm.asm_out.internal_label (asm_out_file, "L",
9408 CODE_LABEL_NUMBER (label));
9409 #endif
9410 }
9411
9412 if (!TARGET_MACHO)
9413 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9414
9415 return "";
9416 }
9417
9418 /* Generate an "push" pattern for input ARG. */
9419
9420 static rtx
9421 gen_push (rtx arg)
9422 {
9423 struct machine_function *m = cfun->machine;
9424
9425 if (m->fs.cfa_reg == stack_pointer_rtx)
9426 m->fs.cfa_offset += UNITS_PER_WORD;
9427 m->fs.sp_offset += UNITS_PER_WORD;
9428
9429 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9430 arg = gen_rtx_REG (word_mode, REGNO (arg));
9431
9432 return gen_rtx_SET (VOIDmode,
9433 gen_rtx_MEM (word_mode,
9434 gen_rtx_PRE_DEC (Pmode,
9435 stack_pointer_rtx)),
9436 arg);
9437 }
9438
9439 /* Generate an "pop" pattern for input ARG. */
9440
9441 static rtx
9442 gen_pop (rtx arg)
9443 {
9444 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9445 arg = gen_rtx_REG (word_mode, REGNO (arg));
9446
9447 return gen_rtx_SET (VOIDmode,
9448 arg,
9449 gen_rtx_MEM (word_mode,
9450 gen_rtx_POST_INC (Pmode,
9451 stack_pointer_rtx)));
9452 }
9453
9454 /* Return >= 0 if there is an unused call-clobbered register available
9455 for the entire function. */
9456
9457 static unsigned int
9458 ix86_select_alt_pic_regnum (void)
9459 {
9460 if (ix86_use_pseudo_pic_reg ())
9461 return INVALID_REGNUM;
9462
9463 if (crtl->is_leaf
9464 && !crtl->profile
9465 && !ix86_current_function_calls_tls_descriptor)
9466 {
9467 int i, drap;
9468 /* Can't use the same register for both PIC and DRAP. */
9469 if (crtl->drap_reg)
9470 drap = REGNO (crtl->drap_reg);
9471 else
9472 drap = -1;
9473 for (i = 2; i >= 0; --i)
9474 if (i != drap && !df_regs_ever_live_p (i))
9475 return i;
9476 }
9477
9478 return INVALID_REGNUM;
9479 }
9480
9481 /* Return TRUE if we need to save REGNO. */
9482
9483 static bool
9484 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9485 {
9486 if (pic_offset_table_rtx
9487 && !ix86_use_pseudo_pic_reg ()
9488 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9489 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9490 || crtl->profile
9491 || crtl->calls_eh_return
9492 || crtl->uses_const_pool
9493 || cfun->has_nonlocal_label))
9494 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9495
9496 if (crtl->calls_eh_return && maybe_eh_return)
9497 {
9498 unsigned i;
9499 for (i = 0; ; i++)
9500 {
9501 unsigned test = EH_RETURN_DATA_REGNO (i);
9502 if (test == INVALID_REGNUM)
9503 break;
9504 if (test == regno)
9505 return true;
9506 }
9507 }
9508
9509 if (crtl->drap_reg
9510 && regno == REGNO (crtl->drap_reg)
9511 && !cfun->machine->no_drap_save_restore)
9512 return true;
9513
9514 return (df_regs_ever_live_p (regno)
9515 && !call_used_regs[regno]
9516 && !fixed_regs[regno]
9517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9518 }
9519
9520 /* Return number of saved general prupose registers. */
9521
9522 static int
9523 ix86_nsaved_regs (void)
9524 {
9525 int nregs = 0;
9526 int regno;
9527
9528 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9529 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9530 nregs ++;
9531 return nregs;
9532 }
9533
9534 /* Return number of saved SSE registrers. */
9535
9536 static int
9537 ix86_nsaved_sseregs (void)
9538 {
9539 int nregs = 0;
9540 int regno;
9541
9542 if (!TARGET_64BIT_MS_ABI)
9543 return 0;
9544 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9545 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9546 nregs ++;
9547 return nregs;
9548 }
9549
9550 /* Given FROM and TO register numbers, say whether this elimination is
9551 allowed. If stack alignment is needed, we can only replace argument
9552 pointer with hard frame pointer, or replace frame pointer with stack
9553 pointer. Otherwise, frame pointer elimination is automatically
9554 handled and all other eliminations are valid. */
9555
9556 static bool
9557 ix86_can_eliminate (const int from, const int to)
9558 {
9559 if (stack_realign_fp)
9560 return ((from == ARG_POINTER_REGNUM
9561 && to == HARD_FRAME_POINTER_REGNUM)
9562 || (from == FRAME_POINTER_REGNUM
9563 && to == STACK_POINTER_REGNUM));
9564 else
9565 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9566 }
9567
9568 /* Return the offset between two registers, one to be eliminated, and the other
9569 its replacement, at the start of a routine. */
9570
9571 HOST_WIDE_INT
9572 ix86_initial_elimination_offset (int from, int to)
9573 {
9574 struct ix86_frame frame;
9575 ix86_compute_frame_layout (&frame);
9576
9577 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9578 return frame.hard_frame_pointer_offset;
9579 else if (from == FRAME_POINTER_REGNUM
9580 && to == HARD_FRAME_POINTER_REGNUM)
9581 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9582 else
9583 {
9584 gcc_assert (to == STACK_POINTER_REGNUM);
9585
9586 if (from == ARG_POINTER_REGNUM)
9587 return frame.stack_pointer_offset;
9588
9589 gcc_assert (from == FRAME_POINTER_REGNUM);
9590 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9591 }
9592 }
9593
9594 /* In a dynamically-aligned function, we can't know the offset from
9595 stack pointer to frame pointer, so we must ensure that setjmp
9596 eliminates fp against the hard fp (%ebp) rather than trying to
9597 index from %esp up to the top of the frame across a gap that is
9598 of unknown (at compile-time) size. */
9599 static rtx
9600 ix86_builtin_setjmp_frame_value (void)
9601 {
9602 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9603 }
9604
9605 /* When using -fsplit-stack, the allocation routines set a field in
9606 the TCB to the bottom of the stack plus this much space, measured
9607 in bytes. */
9608
9609 #define SPLIT_STACK_AVAILABLE 256
9610
9611 /* Fill structure ix86_frame about frame of currently computed function. */
9612
9613 static void
9614 ix86_compute_frame_layout (struct ix86_frame *frame)
9615 {
9616 unsigned HOST_WIDE_INT stack_alignment_needed;
9617 HOST_WIDE_INT offset;
9618 unsigned HOST_WIDE_INT preferred_alignment;
9619 HOST_WIDE_INT size = get_frame_size ();
9620 HOST_WIDE_INT to_allocate;
9621
9622 frame->nregs = ix86_nsaved_regs ();
9623 frame->nsseregs = ix86_nsaved_sseregs ();
9624
9625 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9626 function prologues and leaf. */
9627 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9628 && (!crtl->is_leaf || cfun->calls_alloca != 0
9629 || ix86_current_function_calls_tls_descriptor))
9630 {
9631 crtl->preferred_stack_boundary = 128;
9632 crtl->stack_alignment_needed = 128;
9633 }
9634 /* preferred_stack_boundary is never updated for call
9635 expanded from tls descriptor. Update it here. We don't update it in
9636 expand stage because according to the comments before
9637 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9638 away. */
9639 else if (ix86_current_function_calls_tls_descriptor
9640 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9641 {
9642 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9643 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9644 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9645 }
9646
9647 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9648 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9649
9650 gcc_assert (!size || stack_alignment_needed);
9651 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9652 gcc_assert (preferred_alignment <= stack_alignment_needed);
9653
9654 /* For SEH we have to limit the amount of code movement into the prologue.
9655 At present we do this via a BLOCKAGE, at which point there's very little
9656 scheduling that can be done, which means that there's very little point
9657 in doing anything except PUSHs. */
9658 if (TARGET_SEH)
9659 cfun->machine->use_fast_prologue_epilogue = false;
9660
9661 /* During reload iteration the amount of registers saved can change.
9662 Recompute the value as needed. Do not recompute when amount of registers
9663 didn't change as reload does multiple calls to the function and does not
9664 expect the decision to change within single iteration. */
9665 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9666 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9667 {
9668 int count = frame->nregs;
9669 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9670
9671 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9672
9673 /* The fast prologue uses move instead of push to save registers. This
9674 is significantly longer, but also executes faster as modern hardware
9675 can execute the moves in parallel, but can't do that for push/pop.
9676
9677 Be careful about choosing what prologue to emit: When function takes
9678 many instructions to execute we may use slow version as well as in
9679 case function is known to be outside hot spot (this is known with
9680 feedback only). Weight the size of function by number of registers
9681 to save as it is cheap to use one or two push instructions but very
9682 slow to use many of them. */
9683 if (count)
9684 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9685 if (node->frequency < NODE_FREQUENCY_NORMAL
9686 || (flag_branch_probabilities
9687 && node->frequency < NODE_FREQUENCY_HOT))
9688 cfun->machine->use_fast_prologue_epilogue = false;
9689 else
9690 cfun->machine->use_fast_prologue_epilogue
9691 = !expensive_function_p (count);
9692 }
9693
9694 frame->save_regs_using_mov
9695 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9696 /* If static stack checking is enabled and done with probes,
9697 the registers need to be saved before allocating the frame. */
9698 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9699
9700 /* Skip return address. */
9701 offset = UNITS_PER_WORD;
9702
9703 /* Skip pushed static chain. */
9704 if (ix86_static_chain_on_stack)
9705 offset += UNITS_PER_WORD;
9706
9707 /* Skip saved base pointer. */
9708 if (frame_pointer_needed)
9709 offset += UNITS_PER_WORD;
9710 frame->hfp_save_offset = offset;
9711
9712 /* The traditional frame pointer location is at the top of the frame. */
9713 frame->hard_frame_pointer_offset = offset;
9714
9715 /* Register save area */
9716 offset += frame->nregs * UNITS_PER_WORD;
9717 frame->reg_save_offset = offset;
9718
9719 /* On SEH target, registers are pushed just before the frame pointer
9720 location. */
9721 if (TARGET_SEH)
9722 frame->hard_frame_pointer_offset = offset;
9723
9724 /* Align and set SSE register save area. */
9725 if (frame->nsseregs)
9726 {
9727 /* The only ABI that has saved SSE registers (Win64) also has a
9728 16-byte aligned default stack, and thus we don't need to be
9729 within the re-aligned local stack frame to save them. */
9730 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9731 offset = (offset + 16 - 1) & -16;
9732 offset += frame->nsseregs * 16;
9733 }
9734 frame->sse_reg_save_offset = offset;
9735
9736 /* The re-aligned stack starts here. Values before this point are not
9737 directly comparable with values below this point. In order to make
9738 sure that no value happens to be the same before and after, force
9739 the alignment computation below to add a non-zero value. */
9740 if (stack_realign_fp)
9741 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9742
9743 /* Va-arg area */
9744 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9745 offset += frame->va_arg_size;
9746
9747 /* Align start of frame for local function. */
9748 if (stack_realign_fp
9749 || offset != frame->sse_reg_save_offset
9750 || size != 0
9751 || !crtl->is_leaf
9752 || cfun->calls_alloca
9753 || ix86_current_function_calls_tls_descriptor)
9754 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9755
9756 /* Frame pointer points here. */
9757 frame->frame_pointer_offset = offset;
9758
9759 offset += size;
9760
9761 /* Add outgoing arguments area. Can be skipped if we eliminated
9762 all the function calls as dead code.
9763 Skipping is however impossible when function calls alloca. Alloca
9764 expander assumes that last crtl->outgoing_args_size
9765 of stack frame are unused. */
9766 if (ACCUMULATE_OUTGOING_ARGS
9767 && (!crtl->is_leaf || cfun->calls_alloca
9768 || ix86_current_function_calls_tls_descriptor))
9769 {
9770 offset += crtl->outgoing_args_size;
9771 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9772 }
9773 else
9774 frame->outgoing_arguments_size = 0;
9775
9776 /* Align stack boundary. Only needed if we're calling another function
9777 or using alloca. */
9778 if (!crtl->is_leaf || cfun->calls_alloca
9779 || ix86_current_function_calls_tls_descriptor)
9780 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9781
9782 /* We've reached end of stack frame. */
9783 frame->stack_pointer_offset = offset;
9784
9785 /* Size prologue needs to allocate. */
9786 to_allocate = offset - frame->sse_reg_save_offset;
9787
9788 if ((!to_allocate && frame->nregs <= 1)
9789 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9790 frame->save_regs_using_mov = false;
9791
9792 if (ix86_using_red_zone ()
9793 && crtl->sp_is_unchanging
9794 && crtl->is_leaf
9795 && !ix86_current_function_calls_tls_descriptor)
9796 {
9797 frame->red_zone_size = to_allocate;
9798 if (frame->save_regs_using_mov)
9799 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9800 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9801 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9802 }
9803 else
9804 frame->red_zone_size = 0;
9805 frame->stack_pointer_offset -= frame->red_zone_size;
9806
9807 /* The SEH frame pointer location is near the bottom of the frame.
9808 This is enforced by the fact that the difference between the
9809 stack pointer and the frame pointer is limited to 240 bytes in
9810 the unwind data structure. */
9811 if (TARGET_SEH)
9812 {
9813 HOST_WIDE_INT diff;
9814
9815 /* If we can leave the frame pointer where it is, do so. Also, returns
9816 the establisher frame for __builtin_frame_address (0). */
9817 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9818 if (diff <= SEH_MAX_FRAME_SIZE
9819 && (diff > 240 || (diff & 15) != 0)
9820 && !crtl->accesses_prior_frames)
9821 {
9822 /* Ideally we'd determine what portion of the local stack frame
9823 (within the constraint of the lowest 240) is most heavily used.
9824 But without that complication, simply bias the frame pointer
9825 by 128 bytes so as to maximize the amount of the local stack
9826 frame that is addressable with 8-bit offsets. */
9827 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9828 }
9829 }
9830 }
9831
9832 /* This is semi-inlined memory_address_length, but simplified
9833 since we know that we're always dealing with reg+offset, and
9834 to avoid having to create and discard all that rtl. */
9835
9836 static inline int
9837 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9838 {
9839 int len = 4;
9840
9841 if (offset == 0)
9842 {
9843 /* EBP and R13 cannot be encoded without an offset. */
9844 len = (regno == BP_REG || regno == R13_REG);
9845 }
9846 else if (IN_RANGE (offset, -128, 127))
9847 len = 1;
9848
9849 /* ESP and R12 must be encoded with a SIB byte. */
9850 if (regno == SP_REG || regno == R12_REG)
9851 len++;
9852
9853 return len;
9854 }
9855
9856 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9857 The valid base registers are taken from CFUN->MACHINE->FS. */
9858
9859 static rtx
9860 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9861 {
9862 const struct machine_function *m = cfun->machine;
9863 rtx base_reg = NULL;
9864 HOST_WIDE_INT base_offset = 0;
9865
9866 if (m->use_fast_prologue_epilogue)
9867 {
9868 /* Choose the base register most likely to allow the most scheduling
9869 opportunities. Generally FP is valid throughout the function,
9870 while DRAP must be reloaded within the epilogue. But choose either
9871 over the SP due to increased encoding size. */
9872
9873 if (m->fs.fp_valid)
9874 {
9875 base_reg = hard_frame_pointer_rtx;
9876 base_offset = m->fs.fp_offset - cfa_offset;
9877 }
9878 else if (m->fs.drap_valid)
9879 {
9880 base_reg = crtl->drap_reg;
9881 base_offset = 0 - cfa_offset;
9882 }
9883 else if (m->fs.sp_valid)
9884 {
9885 base_reg = stack_pointer_rtx;
9886 base_offset = m->fs.sp_offset - cfa_offset;
9887 }
9888 }
9889 else
9890 {
9891 HOST_WIDE_INT toffset;
9892 int len = 16, tlen;
9893
9894 /* Choose the base register with the smallest address encoding.
9895 With a tie, choose FP > DRAP > SP. */
9896 if (m->fs.sp_valid)
9897 {
9898 base_reg = stack_pointer_rtx;
9899 base_offset = m->fs.sp_offset - cfa_offset;
9900 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9901 }
9902 if (m->fs.drap_valid)
9903 {
9904 toffset = 0 - cfa_offset;
9905 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9906 if (tlen <= len)
9907 {
9908 base_reg = crtl->drap_reg;
9909 base_offset = toffset;
9910 len = tlen;
9911 }
9912 }
9913 if (m->fs.fp_valid)
9914 {
9915 toffset = m->fs.fp_offset - cfa_offset;
9916 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9917 if (tlen <= len)
9918 {
9919 base_reg = hard_frame_pointer_rtx;
9920 base_offset = toffset;
9921 len = tlen;
9922 }
9923 }
9924 }
9925 gcc_assert (base_reg != NULL);
9926
9927 return plus_constant (Pmode, base_reg, base_offset);
9928 }
9929
9930 /* Emit code to save registers in the prologue. */
9931
9932 static void
9933 ix86_emit_save_regs (void)
9934 {
9935 unsigned int regno;
9936 rtx insn;
9937
9938 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9939 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9940 {
9941 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9942 RTX_FRAME_RELATED_P (insn) = 1;
9943 }
9944 }
9945
9946 /* Emit a single register save at CFA - CFA_OFFSET. */
9947
9948 static void
9949 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9950 HOST_WIDE_INT cfa_offset)
9951 {
9952 struct machine_function *m = cfun->machine;
9953 rtx reg = gen_rtx_REG (mode, regno);
9954 rtx mem, addr, base, insn;
9955
9956 addr = choose_baseaddr (cfa_offset);
9957 mem = gen_frame_mem (mode, addr);
9958
9959 /* For SSE saves, we need to indicate the 128-bit alignment. */
9960 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9961
9962 insn = emit_move_insn (mem, reg);
9963 RTX_FRAME_RELATED_P (insn) = 1;
9964
9965 base = addr;
9966 if (GET_CODE (base) == PLUS)
9967 base = XEXP (base, 0);
9968 gcc_checking_assert (REG_P (base));
9969
9970 /* When saving registers into a re-aligned local stack frame, avoid
9971 any tricky guessing by dwarf2out. */
9972 if (m->fs.realigned)
9973 {
9974 gcc_checking_assert (stack_realign_drap);
9975
9976 if (regno == REGNO (crtl->drap_reg))
9977 {
9978 /* A bit of a hack. We force the DRAP register to be saved in
9979 the re-aligned stack frame, which provides us with a copy
9980 of the CFA that will last past the prologue. Install it. */
9981 gcc_checking_assert (cfun->machine->fs.fp_valid);
9982 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9983 cfun->machine->fs.fp_offset - cfa_offset);
9984 mem = gen_rtx_MEM (mode, addr);
9985 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9986 }
9987 else
9988 {
9989 /* The frame pointer is a stable reference within the
9990 aligned frame. Use it. */
9991 gcc_checking_assert (cfun->machine->fs.fp_valid);
9992 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9993 cfun->machine->fs.fp_offset - cfa_offset);
9994 mem = gen_rtx_MEM (mode, addr);
9995 add_reg_note (insn, REG_CFA_EXPRESSION,
9996 gen_rtx_SET (VOIDmode, mem, reg));
9997 }
9998 }
9999
10000 /* The memory may not be relative to the current CFA register,
10001 which means that we may need to generate a new pattern for
10002 use by the unwind info. */
10003 else if (base != m->fs.cfa_reg)
10004 {
10005 addr = plus_constant (Pmode, m->fs.cfa_reg,
10006 m->fs.cfa_offset - cfa_offset);
10007 mem = gen_rtx_MEM (mode, addr);
10008 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10009 }
10010 }
10011
10012 /* Emit code to save registers using MOV insns.
10013 First register is stored at CFA - CFA_OFFSET. */
10014 static void
10015 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10016 {
10017 unsigned int regno;
10018
10019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10020 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10021 {
10022 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10023 cfa_offset -= UNITS_PER_WORD;
10024 }
10025 }
10026
10027 /* Emit code to save SSE registers using MOV insns.
10028 First register is stored at CFA - CFA_OFFSET. */
10029 static void
10030 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10031 {
10032 unsigned int regno;
10033
10034 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10035 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10036 {
10037 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10038 cfa_offset -= 16;
10039 }
10040 }
10041
10042 static GTY(()) rtx queued_cfa_restores;
10043
10044 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10045 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10046 Don't add the note if the previously saved value will be left untouched
10047 within stack red-zone till return, as unwinders can find the same value
10048 in the register and on the stack. */
10049
10050 static void
10051 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10052 {
10053 if (!crtl->shrink_wrapped
10054 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10055 return;
10056
10057 if (insn)
10058 {
10059 add_reg_note (insn, REG_CFA_RESTORE, reg);
10060 RTX_FRAME_RELATED_P (insn) = 1;
10061 }
10062 else
10063 queued_cfa_restores
10064 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10065 }
10066
10067 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10068
10069 static void
10070 ix86_add_queued_cfa_restore_notes (rtx insn)
10071 {
10072 rtx last;
10073 if (!queued_cfa_restores)
10074 return;
10075 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10076 ;
10077 XEXP (last, 1) = REG_NOTES (insn);
10078 REG_NOTES (insn) = queued_cfa_restores;
10079 queued_cfa_restores = NULL_RTX;
10080 RTX_FRAME_RELATED_P (insn) = 1;
10081 }
10082
10083 /* Expand prologue or epilogue stack adjustment.
10084 The pattern exist to put a dependency on all ebp-based memory accesses.
10085 STYLE should be negative if instructions should be marked as frame related,
10086 zero if %r11 register is live and cannot be freely used and positive
10087 otherwise. */
10088
10089 static void
10090 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10091 int style, bool set_cfa)
10092 {
10093 struct machine_function *m = cfun->machine;
10094 rtx insn;
10095 bool add_frame_related_expr = false;
10096
10097 if (Pmode == SImode)
10098 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10099 else if (x86_64_immediate_operand (offset, DImode))
10100 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10101 else
10102 {
10103 rtx tmp;
10104 /* r11 is used by indirect sibcall return as well, set before the
10105 epilogue and used after the epilogue. */
10106 if (style)
10107 tmp = gen_rtx_REG (DImode, R11_REG);
10108 else
10109 {
10110 gcc_assert (src != hard_frame_pointer_rtx
10111 && dest != hard_frame_pointer_rtx);
10112 tmp = hard_frame_pointer_rtx;
10113 }
10114 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10115 if (style < 0)
10116 add_frame_related_expr = true;
10117
10118 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10119 }
10120
10121 insn = emit_insn (insn);
10122 if (style >= 0)
10123 ix86_add_queued_cfa_restore_notes (insn);
10124
10125 if (set_cfa)
10126 {
10127 rtx r;
10128
10129 gcc_assert (m->fs.cfa_reg == src);
10130 m->fs.cfa_offset += INTVAL (offset);
10131 m->fs.cfa_reg = dest;
10132
10133 r = gen_rtx_PLUS (Pmode, src, offset);
10134 r = gen_rtx_SET (VOIDmode, dest, r);
10135 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10136 RTX_FRAME_RELATED_P (insn) = 1;
10137 }
10138 else if (style < 0)
10139 {
10140 RTX_FRAME_RELATED_P (insn) = 1;
10141 if (add_frame_related_expr)
10142 {
10143 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10144 r = gen_rtx_SET (VOIDmode, dest, r);
10145 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10146 }
10147 }
10148
10149 if (dest == stack_pointer_rtx)
10150 {
10151 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10152 bool valid = m->fs.sp_valid;
10153
10154 if (src == hard_frame_pointer_rtx)
10155 {
10156 valid = m->fs.fp_valid;
10157 ooffset = m->fs.fp_offset;
10158 }
10159 else if (src == crtl->drap_reg)
10160 {
10161 valid = m->fs.drap_valid;
10162 ooffset = 0;
10163 }
10164 else
10165 {
10166 /* Else there are two possibilities: SP itself, which we set
10167 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10168 taken care of this by hand along the eh_return path. */
10169 gcc_checking_assert (src == stack_pointer_rtx
10170 || offset == const0_rtx);
10171 }
10172
10173 m->fs.sp_offset = ooffset - INTVAL (offset);
10174 m->fs.sp_valid = valid;
10175 }
10176 }
10177
10178 /* Find an available register to be used as dynamic realign argument
10179 pointer regsiter. Such a register will be written in prologue and
10180 used in begin of body, so it must not be
10181 1. parameter passing register.
10182 2. GOT pointer.
10183 We reuse static-chain register if it is available. Otherwise, we
10184 use DI for i386 and R13 for x86-64. We chose R13 since it has
10185 shorter encoding.
10186
10187 Return: the regno of chosen register. */
10188
10189 static unsigned int
10190 find_drap_reg (void)
10191 {
10192 tree decl = cfun->decl;
10193
10194 if (TARGET_64BIT)
10195 {
10196 /* Use R13 for nested function or function need static chain.
10197 Since function with tail call may use any caller-saved
10198 registers in epilogue, DRAP must not use caller-saved
10199 register in such case. */
10200 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10201 return R13_REG;
10202
10203 return R10_REG;
10204 }
10205 else
10206 {
10207 /* Use DI for nested function or function need static chain.
10208 Since function with tail call may use any caller-saved
10209 registers in epilogue, DRAP must not use caller-saved
10210 register in such case. */
10211 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10212 return DI_REG;
10213
10214 /* Reuse static chain register if it isn't used for parameter
10215 passing. */
10216 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10217 {
10218 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10219 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10220 return CX_REG;
10221 }
10222 return DI_REG;
10223 }
10224 }
10225
10226 /* Return minimum incoming stack alignment. */
10227
10228 static unsigned int
10229 ix86_minimum_incoming_stack_boundary (bool sibcall)
10230 {
10231 unsigned int incoming_stack_boundary;
10232
10233 /* Prefer the one specified at command line. */
10234 if (ix86_user_incoming_stack_boundary)
10235 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10236 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10237 if -mstackrealign is used, it isn't used for sibcall check and
10238 estimated stack alignment is 128bit. */
10239 else if (!sibcall
10240 && !TARGET_64BIT
10241 && ix86_force_align_arg_pointer
10242 && crtl->stack_alignment_estimated == 128)
10243 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10244 else
10245 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10246
10247 /* Incoming stack alignment can be changed on individual functions
10248 via force_align_arg_pointer attribute. We use the smallest
10249 incoming stack boundary. */
10250 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10251 && lookup_attribute (ix86_force_align_arg_pointer_string,
10252 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10253 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10254
10255 /* The incoming stack frame has to be aligned at least at
10256 parm_stack_boundary. */
10257 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10258 incoming_stack_boundary = crtl->parm_stack_boundary;
10259
10260 /* Stack at entrance of main is aligned by runtime. We use the
10261 smallest incoming stack boundary. */
10262 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10263 && DECL_NAME (current_function_decl)
10264 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10265 && DECL_FILE_SCOPE_P (current_function_decl))
10266 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10267
10268 return incoming_stack_boundary;
10269 }
10270
10271 /* Update incoming stack boundary and estimated stack alignment. */
10272
10273 static void
10274 ix86_update_stack_boundary (void)
10275 {
10276 ix86_incoming_stack_boundary
10277 = ix86_minimum_incoming_stack_boundary (false);
10278
10279 /* x86_64 vararg needs 16byte stack alignment for register save
10280 area. */
10281 if (TARGET_64BIT
10282 && cfun->stdarg
10283 && crtl->stack_alignment_estimated < 128)
10284 crtl->stack_alignment_estimated = 128;
10285 }
10286
10287 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10288 needed or an rtx for DRAP otherwise. */
10289
10290 static rtx
10291 ix86_get_drap_rtx (void)
10292 {
10293 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10294 crtl->need_drap = true;
10295
10296 if (stack_realign_drap)
10297 {
10298 /* Assign DRAP to vDRAP and returns vDRAP */
10299 unsigned int regno = find_drap_reg ();
10300 rtx drap_vreg;
10301 rtx arg_ptr;
10302 rtx_insn *seq, *insn;
10303
10304 arg_ptr = gen_rtx_REG (Pmode, regno);
10305 crtl->drap_reg = arg_ptr;
10306
10307 start_sequence ();
10308 drap_vreg = copy_to_reg (arg_ptr);
10309 seq = get_insns ();
10310 end_sequence ();
10311
10312 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10313 if (!optimize)
10314 {
10315 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10316 RTX_FRAME_RELATED_P (insn) = 1;
10317 }
10318 return drap_vreg;
10319 }
10320 else
10321 return NULL;
10322 }
10323
10324 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10325
10326 static rtx
10327 ix86_internal_arg_pointer (void)
10328 {
10329 return virtual_incoming_args_rtx;
10330 }
10331
10332 struct scratch_reg {
10333 rtx reg;
10334 bool saved;
10335 };
10336
10337 /* Return a short-lived scratch register for use on function entry.
10338 In 32-bit mode, it is valid only after the registers are saved
10339 in the prologue. This register must be released by means of
10340 release_scratch_register_on_entry once it is dead. */
10341
10342 static void
10343 get_scratch_register_on_entry (struct scratch_reg *sr)
10344 {
10345 int regno;
10346
10347 sr->saved = false;
10348
10349 if (TARGET_64BIT)
10350 {
10351 /* We always use R11 in 64-bit mode. */
10352 regno = R11_REG;
10353 }
10354 else
10355 {
10356 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10357 bool fastcall_p
10358 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10359 bool thiscall_p
10360 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10361 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10362 int regparm = ix86_function_regparm (fntype, decl);
10363 int drap_regno
10364 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10365
10366 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10367 for the static chain register. */
10368 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10369 && drap_regno != AX_REG)
10370 regno = AX_REG;
10371 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10372 for the static chain register. */
10373 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10374 regno = AX_REG;
10375 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10376 regno = DX_REG;
10377 /* ecx is the static chain register. */
10378 else if (regparm < 3 && !fastcall_p && !thiscall_p
10379 && !static_chain_p
10380 && drap_regno != CX_REG)
10381 regno = CX_REG;
10382 else if (ix86_save_reg (BX_REG, true))
10383 regno = BX_REG;
10384 /* esi is the static chain register. */
10385 else if (!(regparm == 3 && static_chain_p)
10386 && ix86_save_reg (SI_REG, true))
10387 regno = SI_REG;
10388 else if (ix86_save_reg (DI_REG, true))
10389 regno = DI_REG;
10390 else
10391 {
10392 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10393 sr->saved = true;
10394 }
10395 }
10396
10397 sr->reg = gen_rtx_REG (Pmode, regno);
10398 if (sr->saved)
10399 {
10400 rtx insn = emit_insn (gen_push (sr->reg));
10401 RTX_FRAME_RELATED_P (insn) = 1;
10402 }
10403 }
10404
10405 /* Release a scratch register obtained from the preceding function. */
10406
10407 static void
10408 release_scratch_register_on_entry (struct scratch_reg *sr)
10409 {
10410 if (sr->saved)
10411 {
10412 struct machine_function *m = cfun->machine;
10413 rtx x, insn = emit_insn (gen_pop (sr->reg));
10414
10415 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10416 RTX_FRAME_RELATED_P (insn) = 1;
10417 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10418 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10419 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10420 m->fs.sp_offset -= UNITS_PER_WORD;
10421 }
10422 }
10423
10424 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10425
10426 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10427
10428 static void
10429 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10430 {
10431 /* We skip the probe for the first interval + a small dope of 4 words and
10432 probe that many bytes past the specified size to maintain a protection
10433 area at the botton of the stack. */
10434 const int dope = 4 * UNITS_PER_WORD;
10435 rtx size_rtx = GEN_INT (size), last;
10436
10437 /* See if we have a constant small number of probes to generate. If so,
10438 that's the easy case. The run-time loop is made up of 11 insns in the
10439 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10440 for n # of intervals. */
10441 if (size <= 5 * PROBE_INTERVAL)
10442 {
10443 HOST_WIDE_INT i, adjust;
10444 bool first_probe = true;
10445
10446 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10447 values of N from 1 until it exceeds SIZE. If only one probe is
10448 needed, this will not generate any code. Then adjust and probe
10449 to PROBE_INTERVAL + SIZE. */
10450 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10451 {
10452 if (first_probe)
10453 {
10454 adjust = 2 * PROBE_INTERVAL + dope;
10455 first_probe = false;
10456 }
10457 else
10458 adjust = PROBE_INTERVAL;
10459
10460 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10461 plus_constant (Pmode, stack_pointer_rtx,
10462 -adjust)));
10463 emit_stack_probe (stack_pointer_rtx);
10464 }
10465
10466 if (first_probe)
10467 adjust = size + PROBE_INTERVAL + dope;
10468 else
10469 adjust = size + PROBE_INTERVAL - i;
10470
10471 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10472 plus_constant (Pmode, stack_pointer_rtx,
10473 -adjust)));
10474 emit_stack_probe (stack_pointer_rtx);
10475
10476 /* Adjust back to account for the additional first interval. */
10477 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10478 plus_constant (Pmode, stack_pointer_rtx,
10479 PROBE_INTERVAL + dope)));
10480 }
10481
10482 /* Otherwise, do the same as above, but in a loop. Note that we must be
10483 extra careful with variables wrapping around because we might be at
10484 the very top (or the very bottom) of the address space and we have
10485 to be able to handle this case properly; in particular, we use an
10486 equality test for the loop condition. */
10487 else
10488 {
10489 HOST_WIDE_INT rounded_size;
10490 struct scratch_reg sr;
10491
10492 get_scratch_register_on_entry (&sr);
10493
10494
10495 /* Step 1: round SIZE to the previous multiple of the interval. */
10496
10497 rounded_size = size & -PROBE_INTERVAL;
10498
10499
10500 /* Step 2: compute initial and final value of the loop counter. */
10501
10502 /* SP = SP_0 + PROBE_INTERVAL. */
10503 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10504 plus_constant (Pmode, stack_pointer_rtx,
10505 - (PROBE_INTERVAL + dope))));
10506
10507 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10508 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10509 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10510 gen_rtx_PLUS (Pmode, sr.reg,
10511 stack_pointer_rtx)));
10512
10513
10514 /* Step 3: the loop
10515
10516 while (SP != LAST_ADDR)
10517 {
10518 SP = SP + PROBE_INTERVAL
10519 probe at SP
10520 }
10521
10522 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10523 values of N from 1 until it is equal to ROUNDED_SIZE. */
10524
10525 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10526
10527
10528 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10529 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10530
10531 if (size != rounded_size)
10532 {
10533 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10534 plus_constant (Pmode, stack_pointer_rtx,
10535 rounded_size - size)));
10536 emit_stack_probe (stack_pointer_rtx);
10537 }
10538
10539 /* Adjust back to account for the additional first interval. */
10540 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10541 plus_constant (Pmode, stack_pointer_rtx,
10542 PROBE_INTERVAL + dope)));
10543
10544 release_scratch_register_on_entry (&sr);
10545 }
10546
10547 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10548
10549 /* Even if the stack pointer isn't the CFA register, we need to correctly
10550 describe the adjustments made to it, in particular differentiate the
10551 frame-related ones from the frame-unrelated ones. */
10552 if (size > 0)
10553 {
10554 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10555 XVECEXP (expr, 0, 0)
10556 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10557 plus_constant (Pmode, stack_pointer_rtx, -size));
10558 XVECEXP (expr, 0, 1)
10559 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10560 plus_constant (Pmode, stack_pointer_rtx,
10561 PROBE_INTERVAL + dope + size));
10562 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10563 RTX_FRAME_RELATED_P (last) = 1;
10564
10565 cfun->machine->fs.sp_offset += size;
10566 }
10567
10568 /* Make sure nothing is scheduled before we are done. */
10569 emit_insn (gen_blockage ());
10570 }
10571
10572 /* Adjust the stack pointer up to REG while probing it. */
10573
10574 const char *
10575 output_adjust_stack_and_probe (rtx reg)
10576 {
10577 static int labelno = 0;
10578 char loop_lab[32], end_lab[32];
10579 rtx xops[2];
10580
10581 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10582 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10583
10584 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10585
10586 /* Jump to END_LAB if SP == LAST_ADDR. */
10587 xops[0] = stack_pointer_rtx;
10588 xops[1] = reg;
10589 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10590 fputs ("\tje\t", asm_out_file);
10591 assemble_name_raw (asm_out_file, end_lab);
10592 fputc ('\n', asm_out_file);
10593
10594 /* SP = SP + PROBE_INTERVAL. */
10595 xops[1] = GEN_INT (PROBE_INTERVAL);
10596 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10597
10598 /* Probe at SP. */
10599 xops[1] = const0_rtx;
10600 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10601
10602 fprintf (asm_out_file, "\tjmp\t");
10603 assemble_name_raw (asm_out_file, loop_lab);
10604 fputc ('\n', asm_out_file);
10605
10606 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10607
10608 return "";
10609 }
10610
10611 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10612 inclusive. These are offsets from the current stack pointer. */
10613
10614 static void
10615 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10616 {
10617 /* See if we have a constant small number of probes to generate. If so,
10618 that's the easy case. The run-time loop is made up of 7 insns in the
10619 generic case while the compile-time loop is made up of n insns for n #
10620 of intervals. */
10621 if (size <= 7 * PROBE_INTERVAL)
10622 {
10623 HOST_WIDE_INT i;
10624
10625 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10626 it exceeds SIZE. If only one probe is needed, this will not
10627 generate any code. Then probe at FIRST + SIZE. */
10628 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10629 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10630 -(first + i)));
10631
10632 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10633 -(first + size)));
10634 }
10635
10636 /* Otherwise, do the same as above, but in a loop. Note that we must be
10637 extra careful with variables wrapping around because we might be at
10638 the very top (or the very bottom) of the address space and we have
10639 to be able to handle this case properly; in particular, we use an
10640 equality test for the loop condition. */
10641 else
10642 {
10643 HOST_WIDE_INT rounded_size, last;
10644 struct scratch_reg sr;
10645
10646 get_scratch_register_on_entry (&sr);
10647
10648
10649 /* Step 1: round SIZE to the previous multiple of the interval. */
10650
10651 rounded_size = size & -PROBE_INTERVAL;
10652
10653
10654 /* Step 2: compute initial and final value of the loop counter. */
10655
10656 /* TEST_OFFSET = FIRST. */
10657 emit_move_insn (sr.reg, GEN_INT (-first));
10658
10659 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10660 last = first + rounded_size;
10661
10662
10663 /* Step 3: the loop
10664
10665 while (TEST_ADDR != LAST_ADDR)
10666 {
10667 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10668 probe at TEST_ADDR
10669 }
10670
10671 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10672 until it is equal to ROUNDED_SIZE. */
10673
10674 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10675
10676
10677 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10678 that SIZE is equal to ROUNDED_SIZE. */
10679
10680 if (size != rounded_size)
10681 emit_stack_probe (plus_constant (Pmode,
10682 gen_rtx_PLUS (Pmode,
10683 stack_pointer_rtx,
10684 sr.reg),
10685 rounded_size - size));
10686
10687 release_scratch_register_on_entry (&sr);
10688 }
10689
10690 /* Make sure nothing is scheduled before we are done. */
10691 emit_insn (gen_blockage ());
10692 }
10693
10694 /* Probe a range of stack addresses from REG to END, inclusive. These are
10695 offsets from the current stack pointer. */
10696
10697 const char *
10698 output_probe_stack_range (rtx reg, rtx end)
10699 {
10700 static int labelno = 0;
10701 char loop_lab[32], end_lab[32];
10702 rtx xops[3];
10703
10704 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10705 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10706
10707 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10708
10709 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10710 xops[0] = reg;
10711 xops[1] = end;
10712 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10713 fputs ("\tje\t", asm_out_file);
10714 assemble_name_raw (asm_out_file, end_lab);
10715 fputc ('\n', asm_out_file);
10716
10717 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10718 xops[1] = GEN_INT (PROBE_INTERVAL);
10719 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10720
10721 /* Probe at TEST_ADDR. */
10722 xops[0] = stack_pointer_rtx;
10723 xops[1] = reg;
10724 xops[2] = const0_rtx;
10725 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10726
10727 fprintf (asm_out_file, "\tjmp\t");
10728 assemble_name_raw (asm_out_file, loop_lab);
10729 fputc ('\n', asm_out_file);
10730
10731 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10732
10733 return "";
10734 }
10735
10736 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10737 to be generated in correct form. */
10738 static void
10739 ix86_finalize_stack_realign_flags (void)
10740 {
10741 /* Check if stack realign is really needed after reload, and
10742 stores result in cfun */
10743 unsigned int incoming_stack_boundary
10744 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10745 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10746 unsigned int stack_realign = (incoming_stack_boundary
10747 < (crtl->is_leaf
10748 ? crtl->max_used_stack_slot_alignment
10749 : crtl->stack_alignment_needed));
10750
10751 if (crtl->stack_realign_finalized)
10752 {
10753 /* After stack_realign_needed is finalized, we can't no longer
10754 change it. */
10755 gcc_assert (crtl->stack_realign_needed == stack_realign);
10756 return;
10757 }
10758
10759 /* If the only reason for frame_pointer_needed is that we conservatively
10760 assumed stack realignment might be needed, but in the end nothing that
10761 needed the stack alignment had been spilled, clear frame_pointer_needed
10762 and say we don't need stack realignment. */
10763 if (stack_realign
10764 && frame_pointer_needed
10765 && crtl->is_leaf
10766 && flag_omit_frame_pointer
10767 && crtl->sp_is_unchanging
10768 && !ix86_current_function_calls_tls_descriptor
10769 && !crtl->accesses_prior_frames
10770 && !cfun->calls_alloca
10771 && !crtl->calls_eh_return
10772 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10773 && !ix86_frame_pointer_required ()
10774 && get_frame_size () == 0
10775 && ix86_nsaved_sseregs () == 0
10776 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10777 {
10778 HARD_REG_SET set_up_by_prologue, prologue_used;
10779 basic_block bb;
10780
10781 CLEAR_HARD_REG_SET (prologue_used);
10782 CLEAR_HARD_REG_SET (set_up_by_prologue);
10783 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10784 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10785 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10786 HARD_FRAME_POINTER_REGNUM);
10787 FOR_EACH_BB_FN (bb, cfun)
10788 {
10789 rtx_insn *insn;
10790 FOR_BB_INSNS (bb, insn)
10791 if (NONDEBUG_INSN_P (insn)
10792 && requires_stack_frame_p (insn, prologue_used,
10793 set_up_by_prologue))
10794 {
10795 crtl->stack_realign_needed = stack_realign;
10796 crtl->stack_realign_finalized = true;
10797 return;
10798 }
10799 }
10800
10801 /* If drap has been set, but it actually isn't live at the start
10802 of the function, there is no reason to set it up. */
10803 if (crtl->drap_reg)
10804 {
10805 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10806 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10807 {
10808 crtl->drap_reg = NULL_RTX;
10809 crtl->need_drap = false;
10810 }
10811 }
10812 else
10813 cfun->machine->no_drap_save_restore = true;
10814
10815 frame_pointer_needed = false;
10816 stack_realign = false;
10817 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10818 crtl->stack_alignment_needed = incoming_stack_boundary;
10819 crtl->stack_alignment_estimated = incoming_stack_boundary;
10820 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10821 crtl->preferred_stack_boundary = incoming_stack_boundary;
10822 df_finish_pass (true);
10823 df_scan_alloc (NULL);
10824 df_scan_blocks ();
10825 df_compute_regs_ever_live (true);
10826 df_analyze ();
10827 }
10828
10829 crtl->stack_realign_needed = stack_realign;
10830 crtl->stack_realign_finalized = true;
10831 }
10832
10833 /* Expand the prologue into a bunch of separate insns. */
10834
10835 void
10836 ix86_expand_prologue (void)
10837 {
10838 struct machine_function *m = cfun->machine;
10839 rtx insn, t;
10840 struct ix86_frame frame;
10841 HOST_WIDE_INT allocate;
10842 bool int_registers_saved;
10843 bool sse_registers_saved;
10844
10845 ix86_finalize_stack_realign_flags ();
10846
10847 /* DRAP should not coexist with stack_realign_fp */
10848 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10849
10850 memset (&m->fs, 0, sizeof (m->fs));
10851
10852 /* Initialize CFA state for before the prologue. */
10853 m->fs.cfa_reg = stack_pointer_rtx;
10854 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10855
10856 /* Track SP offset to the CFA. We continue tracking this after we've
10857 swapped the CFA register away from SP. In the case of re-alignment
10858 this is fudged; we're interested to offsets within the local frame. */
10859 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10860 m->fs.sp_valid = true;
10861
10862 ix86_compute_frame_layout (&frame);
10863
10864 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10865 {
10866 /* We should have already generated an error for any use of
10867 ms_hook on a nested function. */
10868 gcc_checking_assert (!ix86_static_chain_on_stack);
10869
10870 /* Check if profiling is active and we shall use profiling before
10871 prologue variant. If so sorry. */
10872 if (crtl->profile && flag_fentry != 0)
10873 sorry ("ms_hook_prologue attribute isn%'t compatible "
10874 "with -mfentry for 32-bit");
10875
10876 /* In ix86_asm_output_function_label we emitted:
10877 8b ff movl.s %edi,%edi
10878 55 push %ebp
10879 8b ec movl.s %esp,%ebp
10880
10881 This matches the hookable function prologue in Win32 API
10882 functions in Microsoft Windows XP Service Pack 2 and newer.
10883 Wine uses this to enable Windows apps to hook the Win32 API
10884 functions provided by Wine.
10885
10886 What that means is that we've already set up the frame pointer. */
10887
10888 if (frame_pointer_needed
10889 && !(crtl->drap_reg && crtl->stack_realign_needed))
10890 {
10891 rtx push, mov;
10892
10893 /* We've decided to use the frame pointer already set up.
10894 Describe this to the unwinder by pretending that both
10895 push and mov insns happen right here.
10896
10897 Putting the unwind info here at the end of the ms_hook
10898 is done so that we can make absolutely certain we get
10899 the required byte sequence at the start of the function,
10900 rather than relying on an assembler that can produce
10901 the exact encoding required.
10902
10903 However it does mean (in the unpatched case) that we have
10904 a 1 insn window where the asynchronous unwind info is
10905 incorrect. However, if we placed the unwind info at
10906 its correct location we would have incorrect unwind info
10907 in the patched case. Which is probably all moot since
10908 I don't expect Wine generates dwarf2 unwind info for the
10909 system libraries that use this feature. */
10910
10911 insn = emit_insn (gen_blockage ());
10912
10913 push = gen_push (hard_frame_pointer_rtx);
10914 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10915 stack_pointer_rtx);
10916 RTX_FRAME_RELATED_P (push) = 1;
10917 RTX_FRAME_RELATED_P (mov) = 1;
10918
10919 RTX_FRAME_RELATED_P (insn) = 1;
10920 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10921 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10922
10923 /* Note that gen_push incremented m->fs.cfa_offset, even
10924 though we didn't emit the push insn here. */
10925 m->fs.cfa_reg = hard_frame_pointer_rtx;
10926 m->fs.fp_offset = m->fs.cfa_offset;
10927 m->fs.fp_valid = true;
10928 }
10929 else
10930 {
10931 /* The frame pointer is not needed so pop %ebp again.
10932 This leaves us with a pristine state. */
10933 emit_insn (gen_pop (hard_frame_pointer_rtx));
10934 }
10935 }
10936
10937 /* The first insn of a function that accepts its static chain on the
10938 stack is to push the register that would be filled in by a direct
10939 call. This insn will be skipped by the trampoline. */
10940 else if (ix86_static_chain_on_stack)
10941 {
10942 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10943 emit_insn (gen_blockage ());
10944
10945 /* We don't want to interpret this push insn as a register save,
10946 only as a stack adjustment. The real copy of the register as
10947 a save will be done later, if needed. */
10948 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10949 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10950 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10951 RTX_FRAME_RELATED_P (insn) = 1;
10952 }
10953
10954 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10955 of DRAP is needed and stack realignment is really needed after reload */
10956 if (stack_realign_drap)
10957 {
10958 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10959
10960 /* Only need to push parameter pointer reg if it is caller saved. */
10961 if (!call_used_regs[REGNO (crtl->drap_reg)])
10962 {
10963 /* Push arg pointer reg */
10964 insn = emit_insn (gen_push (crtl->drap_reg));
10965 RTX_FRAME_RELATED_P (insn) = 1;
10966 }
10967
10968 /* Grab the argument pointer. */
10969 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10970 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10971 RTX_FRAME_RELATED_P (insn) = 1;
10972 m->fs.cfa_reg = crtl->drap_reg;
10973 m->fs.cfa_offset = 0;
10974
10975 /* Align the stack. */
10976 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10977 stack_pointer_rtx,
10978 GEN_INT (-align_bytes)));
10979 RTX_FRAME_RELATED_P (insn) = 1;
10980
10981 /* Replicate the return address on the stack so that return
10982 address can be reached via (argp - 1) slot. This is needed
10983 to implement macro RETURN_ADDR_RTX and intrinsic function
10984 expand_builtin_return_addr etc. */
10985 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10986 t = gen_frame_mem (word_mode, t);
10987 insn = emit_insn (gen_push (t));
10988 RTX_FRAME_RELATED_P (insn) = 1;
10989
10990 /* For the purposes of frame and register save area addressing,
10991 we've started over with a new frame. */
10992 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10993 m->fs.realigned = true;
10994 }
10995
10996 int_registers_saved = (frame.nregs == 0);
10997 sse_registers_saved = (frame.nsseregs == 0);
10998
10999 if (frame_pointer_needed && !m->fs.fp_valid)
11000 {
11001 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11002 slower on all targets. Also sdb doesn't like it. */
11003 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11004 RTX_FRAME_RELATED_P (insn) = 1;
11005
11006 /* Push registers now, before setting the frame pointer
11007 on SEH target. */
11008 if (!int_registers_saved
11009 && TARGET_SEH
11010 && !frame.save_regs_using_mov)
11011 {
11012 ix86_emit_save_regs ();
11013 int_registers_saved = true;
11014 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11015 }
11016
11017 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11018 {
11019 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11020 RTX_FRAME_RELATED_P (insn) = 1;
11021
11022 if (m->fs.cfa_reg == stack_pointer_rtx)
11023 m->fs.cfa_reg = hard_frame_pointer_rtx;
11024 m->fs.fp_offset = m->fs.sp_offset;
11025 m->fs.fp_valid = true;
11026 }
11027 }
11028
11029 if (!int_registers_saved)
11030 {
11031 /* If saving registers via PUSH, do so now. */
11032 if (!frame.save_regs_using_mov)
11033 {
11034 ix86_emit_save_regs ();
11035 int_registers_saved = true;
11036 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11037 }
11038
11039 /* When using red zone we may start register saving before allocating
11040 the stack frame saving one cycle of the prologue. However, avoid
11041 doing this if we have to probe the stack; at least on x86_64 the
11042 stack probe can turn into a call that clobbers a red zone location. */
11043 else if (ix86_using_red_zone ()
11044 && (! TARGET_STACK_PROBE
11045 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11046 {
11047 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11048 int_registers_saved = true;
11049 }
11050 }
11051
11052 if (stack_realign_fp)
11053 {
11054 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11055 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11056
11057 /* The computation of the size of the re-aligned stack frame means
11058 that we must allocate the size of the register save area before
11059 performing the actual alignment. Otherwise we cannot guarantee
11060 that there's enough storage above the realignment point. */
11061 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11062 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11063 GEN_INT (m->fs.sp_offset
11064 - frame.sse_reg_save_offset),
11065 -1, false);
11066
11067 /* Align the stack. */
11068 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11069 stack_pointer_rtx,
11070 GEN_INT (-align_bytes)));
11071
11072 /* For the purposes of register save area addressing, the stack
11073 pointer is no longer valid. As for the value of sp_offset,
11074 see ix86_compute_frame_layout, which we need to match in order
11075 to pass verification of stack_pointer_offset at the end. */
11076 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11077 m->fs.sp_valid = false;
11078 }
11079
11080 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11081
11082 if (flag_stack_usage_info)
11083 {
11084 /* We start to count from ARG_POINTER. */
11085 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11086
11087 /* If it was realigned, take into account the fake frame. */
11088 if (stack_realign_drap)
11089 {
11090 if (ix86_static_chain_on_stack)
11091 stack_size += UNITS_PER_WORD;
11092
11093 if (!call_used_regs[REGNO (crtl->drap_reg)])
11094 stack_size += UNITS_PER_WORD;
11095
11096 /* This over-estimates by 1 minimal-stack-alignment-unit but
11097 mitigates that by counting in the new return address slot. */
11098 current_function_dynamic_stack_size
11099 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11100 }
11101
11102 current_function_static_stack_size = stack_size;
11103 }
11104
11105 /* On SEH target with very large frame size, allocate an area to save
11106 SSE registers (as the very large allocation won't be described). */
11107 if (TARGET_SEH
11108 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11109 && !sse_registers_saved)
11110 {
11111 HOST_WIDE_INT sse_size =
11112 frame.sse_reg_save_offset - frame.reg_save_offset;
11113
11114 gcc_assert (int_registers_saved);
11115
11116 /* No need to do stack checking as the area will be immediately
11117 written. */
11118 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11119 GEN_INT (-sse_size), -1,
11120 m->fs.cfa_reg == stack_pointer_rtx);
11121 allocate -= sse_size;
11122 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11123 sse_registers_saved = true;
11124 }
11125
11126 /* The stack has already been decremented by the instruction calling us
11127 so probe if the size is non-negative to preserve the protection area. */
11128 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11129 {
11130 /* We expect the registers to be saved when probes are used. */
11131 gcc_assert (int_registers_saved);
11132
11133 if (STACK_CHECK_MOVING_SP)
11134 {
11135 if (!(crtl->is_leaf && !cfun->calls_alloca
11136 && allocate <= PROBE_INTERVAL))
11137 {
11138 ix86_adjust_stack_and_probe (allocate);
11139 allocate = 0;
11140 }
11141 }
11142 else
11143 {
11144 HOST_WIDE_INT size = allocate;
11145
11146 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11147 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11148
11149 if (TARGET_STACK_PROBE)
11150 {
11151 if (crtl->is_leaf && !cfun->calls_alloca)
11152 {
11153 if (size > PROBE_INTERVAL)
11154 ix86_emit_probe_stack_range (0, size);
11155 }
11156 else
11157 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11158 }
11159 else
11160 {
11161 if (crtl->is_leaf && !cfun->calls_alloca)
11162 {
11163 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11164 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11165 size - STACK_CHECK_PROTECT);
11166 }
11167 else
11168 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11169 }
11170 }
11171 }
11172
11173 if (allocate == 0)
11174 ;
11175 else if (!ix86_target_stack_probe ()
11176 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11177 {
11178 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11179 GEN_INT (-allocate), -1,
11180 m->fs.cfa_reg == stack_pointer_rtx);
11181 }
11182 else
11183 {
11184 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11185 rtx r10 = NULL;
11186 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11187 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11188 bool eax_live = ix86_eax_live_at_start_p ();
11189 bool r10_live = false;
11190
11191 if (TARGET_64BIT)
11192 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11193
11194 if (eax_live)
11195 {
11196 insn = emit_insn (gen_push (eax));
11197 allocate -= UNITS_PER_WORD;
11198 /* Note that SEH directives need to continue tracking the stack
11199 pointer even after the frame pointer has been set up. */
11200 if (sp_is_cfa_reg || TARGET_SEH)
11201 {
11202 if (sp_is_cfa_reg)
11203 m->fs.cfa_offset += UNITS_PER_WORD;
11204 RTX_FRAME_RELATED_P (insn) = 1;
11205 }
11206 }
11207
11208 if (r10_live)
11209 {
11210 r10 = gen_rtx_REG (Pmode, R10_REG);
11211 insn = emit_insn (gen_push (r10));
11212 allocate -= UNITS_PER_WORD;
11213 if (sp_is_cfa_reg || TARGET_SEH)
11214 {
11215 if (sp_is_cfa_reg)
11216 m->fs.cfa_offset += UNITS_PER_WORD;
11217 RTX_FRAME_RELATED_P (insn) = 1;
11218 }
11219 }
11220
11221 emit_move_insn (eax, GEN_INT (allocate));
11222 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11223
11224 /* Use the fact that AX still contains ALLOCATE. */
11225 adjust_stack_insn = (Pmode == DImode
11226 ? gen_pro_epilogue_adjust_stack_di_sub
11227 : gen_pro_epilogue_adjust_stack_si_sub);
11228
11229 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11230 stack_pointer_rtx, eax));
11231
11232 if (sp_is_cfa_reg || TARGET_SEH)
11233 {
11234 if (sp_is_cfa_reg)
11235 m->fs.cfa_offset += allocate;
11236 RTX_FRAME_RELATED_P (insn) = 1;
11237 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11238 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11239 plus_constant (Pmode, stack_pointer_rtx,
11240 -allocate)));
11241 }
11242 m->fs.sp_offset += allocate;
11243
11244 /* Use stack_pointer_rtx for relative addressing so that code
11245 works for realigned stack, too. */
11246 if (r10_live && eax_live)
11247 {
11248 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11249 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11250 gen_frame_mem (word_mode, t));
11251 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11252 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11253 gen_frame_mem (word_mode, t));
11254 }
11255 else if (eax_live || r10_live)
11256 {
11257 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11258 emit_move_insn (gen_rtx_REG (word_mode,
11259 (eax_live ? AX_REG : R10_REG)),
11260 gen_frame_mem (word_mode, t));
11261 }
11262 }
11263 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11264
11265 /* If we havn't already set up the frame pointer, do so now. */
11266 if (frame_pointer_needed && !m->fs.fp_valid)
11267 {
11268 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11269 GEN_INT (frame.stack_pointer_offset
11270 - frame.hard_frame_pointer_offset));
11271 insn = emit_insn (insn);
11272 RTX_FRAME_RELATED_P (insn) = 1;
11273 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11274
11275 if (m->fs.cfa_reg == stack_pointer_rtx)
11276 m->fs.cfa_reg = hard_frame_pointer_rtx;
11277 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11278 m->fs.fp_valid = true;
11279 }
11280
11281 if (!int_registers_saved)
11282 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11283 if (!sse_registers_saved)
11284 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11285
11286 if (crtl->drap_reg && !crtl->stack_realign_needed)
11287 {
11288 /* vDRAP is setup but after reload it turns out stack realign
11289 isn't necessary, here we will emit prologue to setup DRAP
11290 without stack realign adjustment */
11291 t = choose_baseaddr (0);
11292 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11293 }
11294
11295 /* Prevent instructions from being scheduled into register save push
11296 sequence when access to the redzone area is done through frame pointer.
11297 The offset between the frame pointer and the stack pointer is calculated
11298 relative to the value of the stack pointer at the end of the function
11299 prologue, and moving instructions that access redzone area via frame
11300 pointer inside push sequence violates this assumption. */
11301 if (frame_pointer_needed && frame.red_zone_size)
11302 emit_insn (gen_memory_blockage ());
11303
11304 /* Emit cld instruction if stringops are used in the function. */
11305 if (TARGET_CLD && ix86_current_function_needs_cld)
11306 emit_insn (gen_cld ());
11307
11308 /* SEH requires that the prologue end within 256 bytes of the start of
11309 the function. Prevent instruction schedules that would extend that.
11310 Further, prevent alloca modifications to the stack pointer from being
11311 combined with prologue modifications. */
11312 if (TARGET_SEH)
11313 emit_insn (gen_prologue_use (stack_pointer_rtx));
11314 }
11315
11316 /* Emit code to restore REG using a POP insn. */
11317
11318 static void
11319 ix86_emit_restore_reg_using_pop (rtx reg)
11320 {
11321 struct machine_function *m = cfun->machine;
11322 rtx insn = emit_insn (gen_pop (reg));
11323
11324 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11325 m->fs.sp_offset -= UNITS_PER_WORD;
11326
11327 if (m->fs.cfa_reg == crtl->drap_reg
11328 && REGNO (reg) == REGNO (crtl->drap_reg))
11329 {
11330 /* Previously we'd represented the CFA as an expression
11331 like *(%ebp - 8). We've just popped that value from
11332 the stack, which means we need to reset the CFA to
11333 the drap register. This will remain until we restore
11334 the stack pointer. */
11335 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11336 RTX_FRAME_RELATED_P (insn) = 1;
11337
11338 /* This means that the DRAP register is valid for addressing too. */
11339 m->fs.drap_valid = true;
11340 return;
11341 }
11342
11343 if (m->fs.cfa_reg == stack_pointer_rtx)
11344 {
11345 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11346 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11347 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11348 RTX_FRAME_RELATED_P (insn) = 1;
11349
11350 m->fs.cfa_offset -= UNITS_PER_WORD;
11351 }
11352
11353 /* When the frame pointer is the CFA, and we pop it, we are
11354 swapping back to the stack pointer as the CFA. This happens
11355 for stack frames that don't allocate other data, so we assume
11356 the stack pointer is now pointing at the return address, i.e.
11357 the function entry state, which makes the offset be 1 word. */
11358 if (reg == hard_frame_pointer_rtx)
11359 {
11360 m->fs.fp_valid = false;
11361 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11362 {
11363 m->fs.cfa_reg = stack_pointer_rtx;
11364 m->fs.cfa_offset -= UNITS_PER_WORD;
11365
11366 add_reg_note (insn, REG_CFA_DEF_CFA,
11367 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11368 GEN_INT (m->fs.cfa_offset)));
11369 RTX_FRAME_RELATED_P (insn) = 1;
11370 }
11371 }
11372 }
11373
11374 /* Emit code to restore saved registers using POP insns. */
11375
11376 static void
11377 ix86_emit_restore_regs_using_pop (void)
11378 {
11379 unsigned int regno;
11380
11381 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11382 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11383 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11384 }
11385
11386 /* Emit code and notes for the LEAVE instruction. */
11387
11388 static void
11389 ix86_emit_leave (void)
11390 {
11391 struct machine_function *m = cfun->machine;
11392 rtx insn = emit_insn (ix86_gen_leave ());
11393
11394 ix86_add_queued_cfa_restore_notes (insn);
11395
11396 gcc_assert (m->fs.fp_valid);
11397 m->fs.sp_valid = true;
11398 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11399 m->fs.fp_valid = false;
11400
11401 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11402 {
11403 m->fs.cfa_reg = stack_pointer_rtx;
11404 m->fs.cfa_offset = m->fs.sp_offset;
11405
11406 add_reg_note (insn, REG_CFA_DEF_CFA,
11407 plus_constant (Pmode, stack_pointer_rtx,
11408 m->fs.sp_offset));
11409 RTX_FRAME_RELATED_P (insn) = 1;
11410 }
11411 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11412 m->fs.fp_offset);
11413 }
11414
11415 /* Emit code to restore saved registers using MOV insns.
11416 First register is restored from CFA - CFA_OFFSET. */
11417 static void
11418 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11419 bool maybe_eh_return)
11420 {
11421 struct machine_function *m = cfun->machine;
11422 unsigned int regno;
11423
11424 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11425 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11426 {
11427 rtx reg = gen_rtx_REG (word_mode, regno);
11428 rtx insn, mem;
11429
11430 mem = choose_baseaddr (cfa_offset);
11431 mem = gen_frame_mem (word_mode, mem);
11432 insn = emit_move_insn (reg, mem);
11433
11434 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11435 {
11436 /* Previously we'd represented the CFA as an expression
11437 like *(%ebp - 8). We've just popped that value from
11438 the stack, which means we need to reset the CFA to
11439 the drap register. This will remain until we restore
11440 the stack pointer. */
11441 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11442 RTX_FRAME_RELATED_P (insn) = 1;
11443
11444 /* This means that the DRAP register is valid for addressing. */
11445 m->fs.drap_valid = true;
11446 }
11447 else
11448 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11449
11450 cfa_offset -= UNITS_PER_WORD;
11451 }
11452 }
11453
11454 /* Emit code to restore saved registers using MOV insns.
11455 First register is restored from CFA - CFA_OFFSET. */
11456 static void
11457 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11458 bool maybe_eh_return)
11459 {
11460 unsigned int regno;
11461
11462 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11463 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11464 {
11465 rtx reg = gen_rtx_REG (V4SFmode, regno);
11466 rtx mem;
11467
11468 mem = choose_baseaddr (cfa_offset);
11469 mem = gen_rtx_MEM (V4SFmode, mem);
11470 set_mem_align (mem, 128);
11471 emit_move_insn (reg, mem);
11472
11473 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11474
11475 cfa_offset -= 16;
11476 }
11477 }
11478
11479 /* Restore function stack, frame, and registers. */
11480
11481 void
11482 ix86_expand_epilogue (int style)
11483 {
11484 struct machine_function *m = cfun->machine;
11485 struct machine_frame_state frame_state_save = m->fs;
11486 struct ix86_frame frame;
11487 bool restore_regs_via_mov;
11488 bool using_drap;
11489
11490 ix86_finalize_stack_realign_flags ();
11491 ix86_compute_frame_layout (&frame);
11492
11493 m->fs.sp_valid = (!frame_pointer_needed
11494 || (crtl->sp_is_unchanging
11495 && !stack_realign_fp));
11496 gcc_assert (!m->fs.sp_valid
11497 || m->fs.sp_offset == frame.stack_pointer_offset);
11498
11499 /* The FP must be valid if the frame pointer is present. */
11500 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11501 gcc_assert (!m->fs.fp_valid
11502 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11503
11504 /* We must have *some* valid pointer to the stack frame. */
11505 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11506
11507 /* The DRAP is never valid at this point. */
11508 gcc_assert (!m->fs.drap_valid);
11509
11510 /* See the comment about red zone and frame
11511 pointer usage in ix86_expand_prologue. */
11512 if (frame_pointer_needed && frame.red_zone_size)
11513 emit_insn (gen_memory_blockage ());
11514
11515 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11516 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11517
11518 /* Determine the CFA offset of the end of the red-zone. */
11519 m->fs.red_zone_offset = 0;
11520 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11521 {
11522 /* The red-zone begins below the return address. */
11523 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11524
11525 /* When the register save area is in the aligned portion of
11526 the stack, determine the maximum runtime displacement that
11527 matches up with the aligned frame. */
11528 if (stack_realign_drap)
11529 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11530 + UNITS_PER_WORD);
11531 }
11532
11533 /* Special care must be taken for the normal return case of a function
11534 using eh_return: the eax and edx registers are marked as saved, but
11535 not restored along this path. Adjust the save location to match. */
11536 if (crtl->calls_eh_return && style != 2)
11537 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11538
11539 /* EH_RETURN requires the use of moves to function properly. */
11540 if (crtl->calls_eh_return)
11541 restore_regs_via_mov = true;
11542 /* SEH requires the use of pops to identify the epilogue. */
11543 else if (TARGET_SEH)
11544 restore_regs_via_mov = false;
11545 /* If we're only restoring one register and sp is not valid then
11546 using a move instruction to restore the register since it's
11547 less work than reloading sp and popping the register. */
11548 else if (!m->fs.sp_valid && frame.nregs <= 1)
11549 restore_regs_via_mov = true;
11550 else if (TARGET_EPILOGUE_USING_MOVE
11551 && cfun->machine->use_fast_prologue_epilogue
11552 && (frame.nregs > 1
11553 || m->fs.sp_offset != frame.reg_save_offset))
11554 restore_regs_via_mov = true;
11555 else if (frame_pointer_needed
11556 && !frame.nregs
11557 && m->fs.sp_offset != frame.reg_save_offset)
11558 restore_regs_via_mov = true;
11559 else if (frame_pointer_needed
11560 && TARGET_USE_LEAVE
11561 && cfun->machine->use_fast_prologue_epilogue
11562 && frame.nregs == 1)
11563 restore_regs_via_mov = true;
11564 else
11565 restore_regs_via_mov = false;
11566
11567 if (restore_regs_via_mov || frame.nsseregs)
11568 {
11569 /* Ensure that the entire register save area is addressable via
11570 the stack pointer, if we will restore via sp. */
11571 if (TARGET_64BIT
11572 && m->fs.sp_offset > 0x7fffffff
11573 && !(m->fs.fp_valid || m->fs.drap_valid)
11574 && (frame.nsseregs + frame.nregs) != 0)
11575 {
11576 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11577 GEN_INT (m->fs.sp_offset
11578 - frame.sse_reg_save_offset),
11579 style,
11580 m->fs.cfa_reg == stack_pointer_rtx);
11581 }
11582 }
11583
11584 /* If there are any SSE registers to restore, then we have to do it
11585 via moves, since there's obviously no pop for SSE regs. */
11586 if (frame.nsseregs)
11587 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11588 style == 2);
11589
11590 if (restore_regs_via_mov)
11591 {
11592 rtx t;
11593
11594 if (frame.nregs)
11595 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11596
11597 /* eh_return epilogues need %ecx added to the stack pointer. */
11598 if (style == 2)
11599 {
11600 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11601
11602 /* Stack align doesn't work with eh_return. */
11603 gcc_assert (!stack_realign_drap);
11604 /* Neither does regparm nested functions. */
11605 gcc_assert (!ix86_static_chain_on_stack);
11606
11607 if (frame_pointer_needed)
11608 {
11609 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11610 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11611 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11612
11613 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11614 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11615
11616 /* Note that we use SA as a temporary CFA, as the return
11617 address is at the proper place relative to it. We
11618 pretend this happens at the FP restore insn because
11619 prior to this insn the FP would be stored at the wrong
11620 offset relative to SA, and after this insn we have no
11621 other reasonable register to use for the CFA. We don't
11622 bother resetting the CFA to the SP for the duration of
11623 the return insn. */
11624 add_reg_note (insn, REG_CFA_DEF_CFA,
11625 plus_constant (Pmode, sa, UNITS_PER_WORD));
11626 ix86_add_queued_cfa_restore_notes (insn);
11627 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11628 RTX_FRAME_RELATED_P (insn) = 1;
11629
11630 m->fs.cfa_reg = sa;
11631 m->fs.cfa_offset = UNITS_PER_WORD;
11632 m->fs.fp_valid = false;
11633
11634 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11635 const0_rtx, style, false);
11636 }
11637 else
11638 {
11639 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11640 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11641 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11642 ix86_add_queued_cfa_restore_notes (insn);
11643
11644 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11645 if (m->fs.cfa_offset != UNITS_PER_WORD)
11646 {
11647 m->fs.cfa_offset = UNITS_PER_WORD;
11648 add_reg_note (insn, REG_CFA_DEF_CFA,
11649 plus_constant (Pmode, stack_pointer_rtx,
11650 UNITS_PER_WORD));
11651 RTX_FRAME_RELATED_P (insn) = 1;
11652 }
11653 }
11654 m->fs.sp_offset = UNITS_PER_WORD;
11655 m->fs.sp_valid = true;
11656 }
11657 }
11658 else
11659 {
11660 /* SEH requires that the function end with (1) a stack adjustment
11661 if necessary, (2) a sequence of pops, and (3) a return or
11662 jump instruction. Prevent insns from the function body from
11663 being scheduled into this sequence. */
11664 if (TARGET_SEH)
11665 {
11666 /* Prevent a catch region from being adjacent to the standard
11667 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11668 several other flags that would be interesting to test are
11669 not yet set up. */
11670 if (flag_non_call_exceptions)
11671 emit_insn (gen_nops (const1_rtx));
11672 else
11673 emit_insn (gen_blockage ());
11674 }
11675
11676 /* First step is to deallocate the stack frame so that we can
11677 pop the registers. Also do it on SEH target for very large
11678 frame as the emitted instructions aren't allowed by the ABI in
11679 epilogues. */
11680 if (!m->fs.sp_valid
11681 || (TARGET_SEH
11682 && (m->fs.sp_offset - frame.reg_save_offset
11683 >= SEH_MAX_FRAME_SIZE)))
11684 {
11685 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11686 GEN_INT (m->fs.fp_offset
11687 - frame.reg_save_offset),
11688 style, false);
11689 }
11690 else if (m->fs.sp_offset != frame.reg_save_offset)
11691 {
11692 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11693 GEN_INT (m->fs.sp_offset
11694 - frame.reg_save_offset),
11695 style,
11696 m->fs.cfa_reg == stack_pointer_rtx);
11697 }
11698
11699 ix86_emit_restore_regs_using_pop ();
11700 }
11701
11702 /* If we used a stack pointer and haven't already got rid of it,
11703 then do so now. */
11704 if (m->fs.fp_valid)
11705 {
11706 /* If the stack pointer is valid and pointing at the frame
11707 pointer store address, then we only need a pop. */
11708 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11709 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11710 /* Leave results in shorter dependency chains on CPUs that are
11711 able to grok it fast. */
11712 else if (TARGET_USE_LEAVE
11713 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11714 || !cfun->machine->use_fast_prologue_epilogue)
11715 ix86_emit_leave ();
11716 else
11717 {
11718 pro_epilogue_adjust_stack (stack_pointer_rtx,
11719 hard_frame_pointer_rtx,
11720 const0_rtx, style, !using_drap);
11721 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11722 }
11723 }
11724
11725 if (using_drap)
11726 {
11727 int param_ptr_offset = UNITS_PER_WORD;
11728 rtx insn;
11729
11730 gcc_assert (stack_realign_drap);
11731
11732 if (ix86_static_chain_on_stack)
11733 param_ptr_offset += UNITS_PER_WORD;
11734 if (!call_used_regs[REGNO (crtl->drap_reg)])
11735 param_ptr_offset += UNITS_PER_WORD;
11736
11737 insn = emit_insn (gen_rtx_SET
11738 (VOIDmode, stack_pointer_rtx,
11739 gen_rtx_PLUS (Pmode,
11740 crtl->drap_reg,
11741 GEN_INT (-param_ptr_offset))));
11742 m->fs.cfa_reg = stack_pointer_rtx;
11743 m->fs.cfa_offset = param_ptr_offset;
11744 m->fs.sp_offset = param_ptr_offset;
11745 m->fs.realigned = false;
11746
11747 add_reg_note (insn, REG_CFA_DEF_CFA,
11748 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11749 GEN_INT (param_ptr_offset)));
11750 RTX_FRAME_RELATED_P (insn) = 1;
11751
11752 if (!call_used_regs[REGNO (crtl->drap_reg)])
11753 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11754 }
11755
11756 /* At this point the stack pointer must be valid, and we must have
11757 restored all of the registers. We may not have deallocated the
11758 entire stack frame. We've delayed this until now because it may
11759 be possible to merge the local stack deallocation with the
11760 deallocation forced by ix86_static_chain_on_stack. */
11761 gcc_assert (m->fs.sp_valid);
11762 gcc_assert (!m->fs.fp_valid);
11763 gcc_assert (!m->fs.realigned);
11764 if (m->fs.sp_offset != UNITS_PER_WORD)
11765 {
11766 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11767 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11768 style, true);
11769 }
11770 else
11771 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11772
11773 /* Sibcall epilogues don't want a return instruction. */
11774 if (style == 0)
11775 {
11776 m->fs = frame_state_save;
11777 return;
11778 }
11779
11780 if (crtl->args.pops_args && crtl->args.size)
11781 {
11782 rtx popc = GEN_INT (crtl->args.pops_args);
11783
11784 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11785 address, do explicit add, and jump indirectly to the caller. */
11786
11787 if (crtl->args.pops_args >= 65536)
11788 {
11789 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11790 rtx insn;
11791
11792 /* There is no "pascal" calling convention in any 64bit ABI. */
11793 gcc_assert (!TARGET_64BIT);
11794
11795 insn = emit_insn (gen_pop (ecx));
11796 m->fs.cfa_offset -= UNITS_PER_WORD;
11797 m->fs.sp_offset -= UNITS_PER_WORD;
11798
11799 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11800 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11801 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11802 add_reg_note (insn, REG_CFA_REGISTER,
11803 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11804 RTX_FRAME_RELATED_P (insn) = 1;
11805
11806 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11807 popc, -1, true);
11808 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11809 }
11810 else
11811 emit_jump_insn (gen_simple_return_pop_internal (popc));
11812 }
11813 else
11814 emit_jump_insn (gen_simple_return_internal ());
11815
11816 /* Restore the state back to the state from the prologue,
11817 so that it's correct for the next epilogue. */
11818 m->fs = frame_state_save;
11819 }
11820
11821 /* Reset from the function's potential modifications. */
11822
11823 static void
11824 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11825 {
11826 if (pic_offset_table_rtx
11827 && !ix86_use_pseudo_pic_reg ())
11828 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11829 #if TARGET_MACHO
11830 /* Mach-O doesn't support labels at the end of objects, so if
11831 it looks like we might want one, insert a NOP. */
11832 {
11833 rtx_insn *insn = get_last_insn ();
11834 rtx_insn *deleted_debug_label = NULL;
11835 while (insn
11836 && NOTE_P (insn)
11837 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11838 {
11839 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11840 notes only, instead set their CODE_LABEL_NUMBER to -1,
11841 otherwise there would be code generation differences
11842 in between -g and -g0. */
11843 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11844 deleted_debug_label = insn;
11845 insn = PREV_INSN (insn);
11846 }
11847 if (insn
11848 && (LABEL_P (insn)
11849 || (NOTE_P (insn)
11850 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11851 fputs ("\tnop\n", file);
11852 else if (deleted_debug_label)
11853 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11854 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11855 CODE_LABEL_NUMBER (insn) = -1;
11856 }
11857 #endif
11858
11859 }
11860
11861 /* Return a scratch register to use in the split stack prologue. The
11862 split stack prologue is used for -fsplit-stack. It is the first
11863 instructions in the function, even before the regular prologue.
11864 The scratch register can be any caller-saved register which is not
11865 used for parameters or for the static chain. */
11866
11867 static unsigned int
11868 split_stack_prologue_scratch_regno (void)
11869 {
11870 if (TARGET_64BIT)
11871 return R11_REG;
11872 else
11873 {
11874 bool is_fastcall, is_thiscall;
11875 int regparm;
11876
11877 is_fastcall = (lookup_attribute ("fastcall",
11878 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11879 != NULL);
11880 is_thiscall = (lookup_attribute ("thiscall",
11881 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11882 != NULL);
11883 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11884
11885 if (is_fastcall)
11886 {
11887 if (DECL_STATIC_CHAIN (cfun->decl))
11888 {
11889 sorry ("-fsplit-stack does not support fastcall with "
11890 "nested function");
11891 return INVALID_REGNUM;
11892 }
11893 return AX_REG;
11894 }
11895 else if (is_thiscall)
11896 {
11897 if (!DECL_STATIC_CHAIN (cfun->decl))
11898 return DX_REG;
11899 return AX_REG;
11900 }
11901 else if (regparm < 3)
11902 {
11903 if (!DECL_STATIC_CHAIN (cfun->decl))
11904 return CX_REG;
11905 else
11906 {
11907 if (regparm >= 2)
11908 {
11909 sorry ("-fsplit-stack does not support 2 register "
11910 "parameters for a nested function");
11911 return INVALID_REGNUM;
11912 }
11913 return DX_REG;
11914 }
11915 }
11916 else
11917 {
11918 /* FIXME: We could make this work by pushing a register
11919 around the addition and comparison. */
11920 sorry ("-fsplit-stack does not support 3 register parameters");
11921 return INVALID_REGNUM;
11922 }
11923 }
11924 }
11925
11926 /* A SYMBOL_REF for the function which allocates new stackspace for
11927 -fsplit-stack. */
11928
11929 static GTY(()) rtx split_stack_fn;
11930
11931 /* A SYMBOL_REF for the more stack function when using the large
11932 model. */
11933
11934 static GTY(()) rtx split_stack_fn_large;
11935
11936 /* Handle -fsplit-stack. These are the first instructions in the
11937 function, even before the regular prologue. */
11938
11939 void
11940 ix86_expand_split_stack_prologue (void)
11941 {
11942 struct ix86_frame frame;
11943 HOST_WIDE_INT allocate;
11944 unsigned HOST_WIDE_INT args_size;
11945 rtx_code_label *label;
11946 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11947 rtx scratch_reg = NULL_RTX;
11948 rtx_code_label *varargs_label = NULL;
11949 rtx fn;
11950
11951 gcc_assert (flag_split_stack && reload_completed);
11952
11953 ix86_finalize_stack_realign_flags ();
11954 ix86_compute_frame_layout (&frame);
11955 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11956
11957 /* This is the label we will branch to if we have enough stack
11958 space. We expect the basic block reordering pass to reverse this
11959 branch if optimizing, so that we branch in the unlikely case. */
11960 label = gen_label_rtx ();
11961
11962 /* We need to compare the stack pointer minus the frame size with
11963 the stack boundary in the TCB. The stack boundary always gives
11964 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11965 can compare directly. Otherwise we need to do an addition. */
11966
11967 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11968 UNSPEC_STACK_CHECK);
11969 limit = gen_rtx_CONST (Pmode, limit);
11970 limit = gen_rtx_MEM (Pmode, limit);
11971 if (allocate < SPLIT_STACK_AVAILABLE)
11972 current = stack_pointer_rtx;
11973 else
11974 {
11975 unsigned int scratch_regno;
11976 rtx offset;
11977
11978 /* We need a scratch register to hold the stack pointer minus
11979 the required frame size. Since this is the very start of the
11980 function, the scratch register can be any caller-saved
11981 register which is not used for parameters. */
11982 offset = GEN_INT (- allocate);
11983 scratch_regno = split_stack_prologue_scratch_regno ();
11984 if (scratch_regno == INVALID_REGNUM)
11985 return;
11986 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11987 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11988 {
11989 /* We don't use ix86_gen_add3 in this case because it will
11990 want to split to lea, but when not optimizing the insn
11991 will not be split after this point. */
11992 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11993 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11994 offset)));
11995 }
11996 else
11997 {
11998 emit_move_insn (scratch_reg, offset);
11999 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12000 stack_pointer_rtx));
12001 }
12002 current = scratch_reg;
12003 }
12004
12005 ix86_expand_branch (GEU, current, limit, label);
12006 jump_insn = get_last_insn ();
12007 JUMP_LABEL (jump_insn) = label;
12008
12009 /* Mark the jump as very likely to be taken. */
12010 add_int_reg_note (jump_insn, REG_BR_PROB,
12011 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12012
12013 if (split_stack_fn == NULL_RTX)
12014 {
12015 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12016 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12017 }
12018 fn = split_stack_fn;
12019
12020 /* Get more stack space. We pass in the desired stack space and the
12021 size of the arguments to copy to the new stack. In 32-bit mode
12022 we push the parameters; __morestack will return on a new stack
12023 anyhow. In 64-bit mode we pass the parameters in r10 and
12024 r11. */
12025 allocate_rtx = GEN_INT (allocate);
12026 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12027 call_fusage = NULL_RTX;
12028 if (TARGET_64BIT)
12029 {
12030 rtx reg10, reg11;
12031
12032 reg10 = gen_rtx_REG (Pmode, R10_REG);
12033 reg11 = gen_rtx_REG (Pmode, R11_REG);
12034
12035 /* If this function uses a static chain, it will be in %r10.
12036 Preserve it across the call to __morestack. */
12037 if (DECL_STATIC_CHAIN (cfun->decl))
12038 {
12039 rtx rax;
12040
12041 rax = gen_rtx_REG (word_mode, AX_REG);
12042 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12043 use_reg (&call_fusage, rax);
12044 }
12045
12046 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12047 && !TARGET_PECOFF)
12048 {
12049 HOST_WIDE_INT argval;
12050
12051 gcc_assert (Pmode == DImode);
12052 /* When using the large model we need to load the address
12053 into a register, and we've run out of registers. So we
12054 switch to a different calling convention, and we call a
12055 different function: __morestack_large. We pass the
12056 argument size in the upper 32 bits of r10 and pass the
12057 frame size in the lower 32 bits. */
12058 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12059 gcc_assert ((args_size & 0xffffffff) == args_size);
12060
12061 if (split_stack_fn_large == NULL_RTX)
12062 {
12063 split_stack_fn_large =
12064 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12065 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12066 }
12067 if (ix86_cmodel == CM_LARGE_PIC)
12068 {
12069 rtx_code_label *label;
12070 rtx x;
12071
12072 label = gen_label_rtx ();
12073 emit_label (label);
12074 LABEL_PRESERVE_P (label) = 1;
12075 emit_insn (gen_set_rip_rex64 (reg10, label));
12076 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12077 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12078 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12079 UNSPEC_GOT);
12080 x = gen_rtx_CONST (Pmode, x);
12081 emit_move_insn (reg11, x);
12082 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12083 x = gen_const_mem (Pmode, x);
12084 emit_move_insn (reg11, x);
12085 }
12086 else
12087 emit_move_insn (reg11, split_stack_fn_large);
12088
12089 fn = reg11;
12090
12091 argval = ((args_size << 16) << 16) + allocate;
12092 emit_move_insn (reg10, GEN_INT (argval));
12093 }
12094 else
12095 {
12096 emit_move_insn (reg10, allocate_rtx);
12097 emit_move_insn (reg11, GEN_INT (args_size));
12098 use_reg (&call_fusage, reg11);
12099 }
12100
12101 use_reg (&call_fusage, reg10);
12102 }
12103 else
12104 {
12105 emit_insn (gen_push (GEN_INT (args_size)));
12106 emit_insn (gen_push (allocate_rtx));
12107 }
12108 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12109 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12110 NULL_RTX, false);
12111 add_function_usage_to (call_insn, call_fusage);
12112
12113 /* In order to make call/return prediction work right, we now need
12114 to execute a return instruction. See
12115 libgcc/config/i386/morestack.S for the details on how this works.
12116
12117 For flow purposes gcc must not see this as a return
12118 instruction--we need control flow to continue at the subsequent
12119 label. Therefore, we use an unspec. */
12120 gcc_assert (crtl->args.pops_args < 65536);
12121 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12122
12123 /* If we are in 64-bit mode and this function uses a static chain,
12124 we saved %r10 in %rax before calling _morestack. */
12125 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12126 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12127 gen_rtx_REG (word_mode, AX_REG));
12128
12129 /* If this function calls va_start, we need to store a pointer to
12130 the arguments on the old stack, because they may not have been
12131 all copied to the new stack. At this point the old stack can be
12132 found at the frame pointer value used by __morestack, because
12133 __morestack has set that up before calling back to us. Here we
12134 store that pointer in a scratch register, and in
12135 ix86_expand_prologue we store the scratch register in a stack
12136 slot. */
12137 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12138 {
12139 unsigned int scratch_regno;
12140 rtx frame_reg;
12141 int words;
12142
12143 scratch_regno = split_stack_prologue_scratch_regno ();
12144 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12145 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12146
12147 /* 64-bit:
12148 fp -> old fp value
12149 return address within this function
12150 return address of caller of this function
12151 stack arguments
12152 So we add three words to get to the stack arguments.
12153
12154 32-bit:
12155 fp -> old fp value
12156 return address within this function
12157 first argument to __morestack
12158 second argument to __morestack
12159 return address of caller of this function
12160 stack arguments
12161 So we add five words to get to the stack arguments.
12162 */
12163 words = TARGET_64BIT ? 3 : 5;
12164 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12165 gen_rtx_PLUS (Pmode, frame_reg,
12166 GEN_INT (words * UNITS_PER_WORD))));
12167
12168 varargs_label = gen_label_rtx ();
12169 emit_jump_insn (gen_jump (varargs_label));
12170 JUMP_LABEL (get_last_insn ()) = varargs_label;
12171
12172 emit_barrier ();
12173 }
12174
12175 emit_label (label);
12176 LABEL_NUSES (label) = 1;
12177
12178 /* If this function calls va_start, we now have to set the scratch
12179 register for the case where we do not call __morestack. In this
12180 case we need to set it based on the stack pointer. */
12181 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12182 {
12183 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12184 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12185 GEN_INT (UNITS_PER_WORD))));
12186
12187 emit_label (varargs_label);
12188 LABEL_NUSES (varargs_label) = 1;
12189 }
12190 }
12191
12192 /* We may have to tell the dataflow pass that the split stack prologue
12193 is initializing a scratch register. */
12194
12195 static void
12196 ix86_live_on_entry (bitmap regs)
12197 {
12198 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12199 {
12200 gcc_assert (flag_split_stack);
12201 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12202 }
12203 }
12204 \f
12205 /* Extract the parts of an RTL expression that is a valid memory address
12206 for an instruction. Return 0 if the structure of the address is
12207 grossly off. Return -1 if the address contains ASHIFT, so it is not
12208 strictly valid, but still used for computing length of lea instruction. */
12209
12210 int
12211 ix86_decompose_address (rtx addr, struct ix86_address *out)
12212 {
12213 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12214 rtx base_reg, index_reg;
12215 HOST_WIDE_INT scale = 1;
12216 rtx scale_rtx = NULL_RTX;
12217 rtx tmp;
12218 int retval = 1;
12219 enum ix86_address_seg seg = SEG_DEFAULT;
12220
12221 /* Allow zero-extended SImode addresses,
12222 they will be emitted with addr32 prefix. */
12223 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12224 {
12225 if (GET_CODE (addr) == ZERO_EXTEND
12226 && GET_MODE (XEXP (addr, 0)) == SImode)
12227 {
12228 addr = XEXP (addr, 0);
12229 if (CONST_INT_P (addr))
12230 return 0;
12231 }
12232 else if (GET_CODE (addr) == AND
12233 && const_32bit_mask (XEXP (addr, 1), DImode))
12234 {
12235 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12236 if (addr == NULL_RTX)
12237 return 0;
12238
12239 if (CONST_INT_P (addr))
12240 return 0;
12241 }
12242 }
12243
12244 /* Allow SImode subregs of DImode addresses,
12245 they will be emitted with addr32 prefix. */
12246 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12247 {
12248 if (GET_CODE (addr) == SUBREG
12249 && GET_MODE (SUBREG_REG (addr)) == DImode)
12250 {
12251 addr = SUBREG_REG (addr);
12252 if (CONST_INT_P (addr))
12253 return 0;
12254 }
12255 }
12256
12257 if (REG_P (addr))
12258 base = addr;
12259 else if (GET_CODE (addr) == SUBREG)
12260 {
12261 if (REG_P (SUBREG_REG (addr)))
12262 base = addr;
12263 else
12264 return 0;
12265 }
12266 else if (GET_CODE (addr) == PLUS)
12267 {
12268 rtx addends[4], op;
12269 int n = 0, i;
12270
12271 op = addr;
12272 do
12273 {
12274 if (n >= 4)
12275 return 0;
12276 addends[n++] = XEXP (op, 1);
12277 op = XEXP (op, 0);
12278 }
12279 while (GET_CODE (op) == PLUS);
12280 if (n >= 4)
12281 return 0;
12282 addends[n] = op;
12283
12284 for (i = n; i >= 0; --i)
12285 {
12286 op = addends[i];
12287 switch (GET_CODE (op))
12288 {
12289 case MULT:
12290 if (index)
12291 return 0;
12292 index = XEXP (op, 0);
12293 scale_rtx = XEXP (op, 1);
12294 break;
12295
12296 case ASHIFT:
12297 if (index)
12298 return 0;
12299 index = XEXP (op, 0);
12300 tmp = XEXP (op, 1);
12301 if (!CONST_INT_P (tmp))
12302 return 0;
12303 scale = INTVAL (tmp);
12304 if ((unsigned HOST_WIDE_INT) scale > 3)
12305 return 0;
12306 scale = 1 << scale;
12307 break;
12308
12309 case ZERO_EXTEND:
12310 op = XEXP (op, 0);
12311 if (GET_CODE (op) != UNSPEC)
12312 return 0;
12313 /* FALLTHRU */
12314
12315 case UNSPEC:
12316 if (XINT (op, 1) == UNSPEC_TP
12317 && TARGET_TLS_DIRECT_SEG_REFS
12318 && seg == SEG_DEFAULT)
12319 seg = DEFAULT_TLS_SEG_REG;
12320 else
12321 return 0;
12322 break;
12323
12324 case SUBREG:
12325 if (!REG_P (SUBREG_REG (op)))
12326 return 0;
12327 /* FALLTHRU */
12328
12329 case REG:
12330 if (!base)
12331 base = op;
12332 else if (!index)
12333 index = op;
12334 else
12335 return 0;
12336 break;
12337
12338 case CONST:
12339 case CONST_INT:
12340 case SYMBOL_REF:
12341 case LABEL_REF:
12342 if (disp)
12343 return 0;
12344 disp = op;
12345 break;
12346
12347 default:
12348 return 0;
12349 }
12350 }
12351 }
12352 else if (GET_CODE (addr) == MULT)
12353 {
12354 index = XEXP (addr, 0); /* index*scale */
12355 scale_rtx = XEXP (addr, 1);
12356 }
12357 else if (GET_CODE (addr) == ASHIFT)
12358 {
12359 /* We're called for lea too, which implements ashift on occasion. */
12360 index = XEXP (addr, 0);
12361 tmp = XEXP (addr, 1);
12362 if (!CONST_INT_P (tmp))
12363 return 0;
12364 scale = INTVAL (tmp);
12365 if ((unsigned HOST_WIDE_INT) scale > 3)
12366 return 0;
12367 scale = 1 << scale;
12368 retval = -1;
12369 }
12370 else
12371 disp = addr; /* displacement */
12372
12373 if (index)
12374 {
12375 if (REG_P (index))
12376 ;
12377 else if (GET_CODE (index) == SUBREG
12378 && REG_P (SUBREG_REG (index)))
12379 ;
12380 else
12381 return 0;
12382 }
12383
12384 /* Extract the integral value of scale. */
12385 if (scale_rtx)
12386 {
12387 if (!CONST_INT_P (scale_rtx))
12388 return 0;
12389 scale = INTVAL (scale_rtx);
12390 }
12391
12392 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12393 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12394
12395 /* Avoid useless 0 displacement. */
12396 if (disp == const0_rtx && (base || index))
12397 disp = NULL_RTX;
12398
12399 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12400 if (base_reg && index_reg && scale == 1
12401 && (index_reg == arg_pointer_rtx
12402 || index_reg == frame_pointer_rtx
12403 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12404 {
12405 rtx tmp;
12406 tmp = base, base = index, index = tmp;
12407 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12408 }
12409
12410 /* Special case: %ebp cannot be encoded as a base without a displacement.
12411 Similarly %r13. */
12412 if (!disp
12413 && base_reg
12414 && (base_reg == hard_frame_pointer_rtx
12415 || base_reg == frame_pointer_rtx
12416 || base_reg == arg_pointer_rtx
12417 || (REG_P (base_reg)
12418 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12419 || REGNO (base_reg) == R13_REG))))
12420 disp = const0_rtx;
12421
12422 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12423 Avoid this by transforming to [%esi+0].
12424 Reload calls address legitimization without cfun defined, so we need
12425 to test cfun for being non-NULL. */
12426 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12427 && base_reg && !index_reg && !disp
12428 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12429 disp = const0_rtx;
12430
12431 /* Special case: encode reg+reg instead of reg*2. */
12432 if (!base && index && scale == 2)
12433 base = index, base_reg = index_reg, scale = 1;
12434
12435 /* Special case: scaling cannot be encoded without base or displacement. */
12436 if (!base && !disp && index && scale != 1)
12437 disp = const0_rtx;
12438
12439 out->base = base;
12440 out->index = index;
12441 out->disp = disp;
12442 out->scale = scale;
12443 out->seg = seg;
12444
12445 return retval;
12446 }
12447 \f
12448 /* Return cost of the memory address x.
12449 For i386, it is better to use a complex address than let gcc copy
12450 the address into a reg and make a new pseudo. But not if the address
12451 requires to two regs - that would mean more pseudos with longer
12452 lifetimes. */
12453 static int
12454 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12455 {
12456 struct ix86_address parts;
12457 int cost = 1;
12458 int ok = ix86_decompose_address (x, &parts);
12459
12460 gcc_assert (ok);
12461
12462 if (parts.base && GET_CODE (parts.base) == SUBREG)
12463 parts.base = SUBREG_REG (parts.base);
12464 if (parts.index && GET_CODE (parts.index) == SUBREG)
12465 parts.index = SUBREG_REG (parts.index);
12466
12467 /* Attempt to minimize number of registers in the address. */
12468 if ((parts.base
12469 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12470 || (parts.index
12471 && (!REG_P (parts.index)
12472 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12473 cost++;
12474
12475 /* When address base or index is "pic_offset_table_rtx" we don't increase
12476 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12477 itself it most likely means that base or index is not invariant.
12478 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12479 profitable for x86. */
12480 if (parts.base
12481 && (!pic_offset_table_rtx
12482 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12483 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12484 && parts.index
12485 && (!pic_offset_table_rtx
12486 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12487 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12488 && parts.base != parts.index)
12489 cost++;
12490
12491 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12492 since it's predecode logic can't detect the length of instructions
12493 and it degenerates to vector decoded. Increase cost of such
12494 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12495 to split such addresses or even refuse such addresses at all.
12496
12497 Following addressing modes are affected:
12498 [base+scale*index]
12499 [scale*index+disp]
12500 [base+index]
12501
12502 The first and last case may be avoidable by explicitly coding the zero in
12503 memory address, but I don't have AMD-K6 machine handy to check this
12504 theory. */
12505
12506 if (TARGET_K6
12507 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12508 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12509 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12510 cost += 10;
12511
12512 return cost;
12513 }
12514 \f
12515 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12516 this is used for to form addresses to local data when -fPIC is in
12517 use. */
12518
12519 static bool
12520 darwin_local_data_pic (rtx disp)
12521 {
12522 return (GET_CODE (disp) == UNSPEC
12523 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12524 }
12525
12526 /* Determine if a given RTX is a valid constant. We already know this
12527 satisfies CONSTANT_P. */
12528
12529 static bool
12530 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12531 {
12532 switch (GET_CODE (x))
12533 {
12534 case CONST:
12535 x = XEXP (x, 0);
12536
12537 if (GET_CODE (x) == PLUS)
12538 {
12539 if (!CONST_INT_P (XEXP (x, 1)))
12540 return false;
12541 x = XEXP (x, 0);
12542 }
12543
12544 if (TARGET_MACHO && darwin_local_data_pic (x))
12545 return true;
12546
12547 /* Only some unspecs are valid as "constants". */
12548 if (GET_CODE (x) == UNSPEC)
12549 switch (XINT (x, 1))
12550 {
12551 case UNSPEC_GOT:
12552 case UNSPEC_GOTOFF:
12553 case UNSPEC_PLTOFF:
12554 return TARGET_64BIT;
12555 case UNSPEC_TPOFF:
12556 case UNSPEC_NTPOFF:
12557 x = XVECEXP (x, 0, 0);
12558 return (GET_CODE (x) == SYMBOL_REF
12559 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12560 case UNSPEC_DTPOFF:
12561 x = XVECEXP (x, 0, 0);
12562 return (GET_CODE (x) == SYMBOL_REF
12563 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12564 default:
12565 return false;
12566 }
12567
12568 /* We must have drilled down to a symbol. */
12569 if (GET_CODE (x) == LABEL_REF)
12570 return true;
12571 if (GET_CODE (x) != SYMBOL_REF)
12572 return false;
12573 /* FALLTHRU */
12574
12575 case SYMBOL_REF:
12576 /* TLS symbols are never valid. */
12577 if (SYMBOL_REF_TLS_MODEL (x))
12578 return false;
12579
12580 /* DLLIMPORT symbols are never valid. */
12581 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12582 && SYMBOL_REF_DLLIMPORT_P (x))
12583 return false;
12584
12585 #if TARGET_MACHO
12586 /* mdynamic-no-pic */
12587 if (MACHO_DYNAMIC_NO_PIC_P)
12588 return machopic_symbol_defined_p (x);
12589 #endif
12590 break;
12591
12592 case CONST_DOUBLE:
12593 if (GET_MODE (x) == TImode
12594 && x != CONST0_RTX (TImode)
12595 && !TARGET_64BIT)
12596 return false;
12597 break;
12598
12599 case CONST_VECTOR:
12600 if (!standard_sse_constant_p (x))
12601 return false;
12602
12603 default:
12604 break;
12605 }
12606
12607 /* Otherwise we handle everything else in the move patterns. */
12608 return true;
12609 }
12610
12611 /* Determine if it's legal to put X into the constant pool. This
12612 is not possible for the address of thread-local symbols, which
12613 is checked above. */
12614
12615 static bool
12616 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12617 {
12618 /* We can always put integral constants and vectors in memory. */
12619 switch (GET_CODE (x))
12620 {
12621 case CONST_INT:
12622 case CONST_DOUBLE:
12623 case CONST_VECTOR:
12624 return false;
12625
12626 default:
12627 break;
12628 }
12629 return !ix86_legitimate_constant_p (mode, x);
12630 }
12631
12632 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12633 otherwise zero. */
12634
12635 static bool
12636 is_imported_p (rtx x)
12637 {
12638 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12639 || GET_CODE (x) != SYMBOL_REF)
12640 return false;
12641
12642 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12643 }
12644
12645
12646 /* Nonzero if the constant value X is a legitimate general operand
12647 when generating PIC code. It is given that flag_pic is on and
12648 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12649
12650 bool
12651 legitimate_pic_operand_p (rtx x)
12652 {
12653 rtx inner;
12654
12655 switch (GET_CODE (x))
12656 {
12657 case CONST:
12658 inner = XEXP (x, 0);
12659 if (GET_CODE (inner) == PLUS
12660 && CONST_INT_P (XEXP (inner, 1)))
12661 inner = XEXP (inner, 0);
12662
12663 /* Only some unspecs are valid as "constants". */
12664 if (GET_CODE (inner) == UNSPEC)
12665 switch (XINT (inner, 1))
12666 {
12667 case UNSPEC_GOT:
12668 case UNSPEC_GOTOFF:
12669 case UNSPEC_PLTOFF:
12670 return TARGET_64BIT;
12671 case UNSPEC_TPOFF:
12672 x = XVECEXP (inner, 0, 0);
12673 return (GET_CODE (x) == SYMBOL_REF
12674 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12675 case UNSPEC_MACHOPIC_OFFSET:
12676 return legitimate_pic_address_disp_p (x);
12677 default:
12678 return false;
12679 }
12680 /* FALLTHRU */
12681
12682 case SYMBOL_REF:
12683 case LABEL_REF:
12684 return legitimate_pic_address_disp_p (x);
12685
12686 default:
12687 return true;
12688 }
12689 }
12690
12691 /* Determine if a given CONST RTX is a valid memory displacement
12692 in PIC mode. */
12693
12694 bool
12695 legitimate_pic_address_disp_p (rtx disp)
12696 {
12697 bool saw_plus;
12698
12699 /* In 64bit mode we can allow direct addresses of symbols and labels
12700 when they are not dynamic symbols. */
12701 if (TARGET_64BIT)
12702 {
12703 rtx op0 = disp, op1;
12704
12705 switch (GET_CODE (disp))
12706 {
12707 case LABEL_REF:
12708 return true;
12709
12710 case CONST:
12711 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12712 break;
12713 op0 = XEXP (XEXP (disp, 0), 0);
12714 op1 = XEXP (XEXP (disp, 0), 1);
12715 if (!CONST_INT_P (op1)
12716 || INTVAL (op1) >= 16*1024*1024
12717 || INTVAL (op1) < -16*1024*1024)
12718 break;
12719 if (GET_CODE (op0) == LABEL_REF)
12720 return true;
12721 if (GET_CODE (op0) == CONST
12722 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12723 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12724 return true;
12725 if (GET_CODE (op0) == UNSPEC
12726 && XINT (op0, 1) == UNSPEC_PCREL)
12727 return true;
12728 if (GET_CODE (op0) != SYMBOL_REF)
12729 break;
12730 /* FALLTHRU */
12731
12732 case SYMBOL_REF:
12733 /* TLS references should always be enclosed in UNSPEC.
12734 The dllimported symbol needs always to be resolved. */
12735 if (SYMBOL_REF_TLS_MODEL (op0)
12736 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12737 return false;
12738
12739 if (TARGET_PECOFF)
12740 {
12741 if (is_imported_p (op0))
12742 return true;
12743
12744 if (SYMBOL_REF_FAR_ADDR_P (op0)
12745 || !SYMBOL_REF_LOCAL_P (op0))
12746 break;
12747
12748 /* Function-symbols need to be resolved only for
12749 large-model.
12750 For the small-model we don't need to resolve anything
12751 here. */
12752 if ((ix86_cmodel != CM_LARGE_PIC
12753 && SYMBOL_REF_FUNCTION_P (op0))
12754 || ix86_cmodel == CM_SMALL_PIC)
12755 return true;
12756 /* Non-external symbols don't need to be resolved for
12757 large, and medium-model. */
12758 if ((ix86_cmodel == CM_LARGE_PIC
12759 || ix86_cmodel == CM_MEDIUM_PIC)
12760 && !SYMBOL_REF_EXTERNAL_P (op0))
12761 return true;
12762 }
12763 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12764 && SYMBOL_REF_LOCAL_P (op0)
12765 && ix86_cmodel != CM_LARGE_PIC)
12766 return true;
12767 break;
12768
12769 default:
12770 break;
12771 }
12772 }
12773 if (GET_CODE (disp) != CONST)
12774 return false;
12775 disp = XEXP (disp, 0);
12776
12777 if (TARGET_64BIT)
12778 {
12779 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12780 of GOT tables. We should not need these anyway. */
12781 if (GET_CODE (disp) != UNSPEC
12782 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12783 && XINT (disp, 1) != UNSPEC_GOTOFF
12784 && XINT (disp, 1) != UNSPEC_PCREL
12785 && XINT (disp, 1) != UNSPEC_PLTOFF))
12786 return false;
12787
12788 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12789 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12790 return false;
12791 return true;
12792 }
12793
12794 saw_plus = false;
12795 if (GET_CODE (disp) == PLUS)
12796 {
12797 if (!CONST_INT_P (XEXP (disp, 1)))
12798 return false;
12799 disp = XEXP (disp, 0);
12800 saw_plus = true;
12801 }
12802
12803 if (TARGET_MACHO && darwin_local_data_pic (disp))
12804 return true;
12805
12806 if (GET_CODE (disp) != UNSPEC)
12807 return false;
12808
12809 switch (XINT (disp, 1))
12810 {
12811 case UNSPEC_GOT:
12812 if (saw_plus)
12813 return false;
12814 /* We need to check for both symbols and labels because VxWorks loads
12815 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12816 details. */
12817 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12818 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12819 case UNSPEC_GOTOFF:
12820 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12821 While ABI specify also 32bit relocation but we don't produce it in
12822 small PIC model at all. */
12823 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12824 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12825 && !TARGET_64BIT)
12826 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12827 return false;
12828 case UNSPEC_GOTTPOFF:
12829 case UNSPEC_GOTNTPOFF:
12830 case UNSPEC_INDNTPOFF:
12831 if (saw_plus)
12832 return false;
12833 disp = XVECEXP (disp, 0, 0);
12834 return (GET_CODE (disp) == SYMBOL_REF
12835 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12836 case UNSPEC_NTPOFF:
12837 disp = XVECEXP (disp, 0, 0);
12838 return (GET_CODE (disp) == SYMBOL_REF
12839 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12840 case UNSPEC_DTPOFF:
12841 disp = XVECEXP (disp, 0, 0);
12842 return (GET_CODE (disp) == SYMBOL_REF
12843 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12844 }
12845
12846 return false;
12847 }
12848
12849 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12850 replace the input X, or the original X if no replacement is called for.
12851 The output parameter *WIN is 1 if the calling macro should goto WIN,
12852 0 if it should not. */
12853
12854 bool
12855 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12856 int)
12857 {
12858 /* Reload can generate:
12859
12860 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12861 (reg:DI 97))
12862 (reg:DI 2 cx))
12863
12864 This RTX is rejected from ix86_legitimate_address_p due to
12865 non-strictness of base register 97. Following this rejection,
12866 reload pushes all three components into separate registers,
12867 creating invalid memory address RTX.
12868
12869 Following code reloads only the invalid part of the
12870 memory address RTX. */
12871
12872 if (GET_CODE (x) == PLUS
12873 && REG_P (XEXP (x, 1))
12874 && GET_CODE (XEXP (x, 0)) == PLUS
12875 && REG_P (XEXP (XEXP (x, 0), 1)))
12876 {
12877 rtx base, index;
12878 bool something_reloaded = false;
12879
12880 base = XEXP (XEXP (x, 0), 1);
12881 if (!REG_OK_FOR_BASE_STRICT_P (base))
12882 {
12883 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12884 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12885 opnum, (enum reload_type) type);
12886 something_reloaded = true;
12887 }
12888
12889 index = XEXP (x, 1);
12890 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12891 {
12892 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12893 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12894 opnum, (enum reload_type) type);
12895 something_reloaded = true;
12896 }
12897
12898 gcc_assert (something_reloaded);
12899 return true;
12900 }
12901
12902 return false;
12903 }
12904
12905 /* Determine if op is suitable RTX for an address register.
12906 Return naked register if a register or a register subreg is
12907 found, otherwise return NULL_RTX. */
12908
12909 static rtx
12910 ix86_validate_address_register (rtx op)
12911 {
12912 enum machine_mode mode = GET_MODE (op);
12913
12914 /* Only SImode or DImode registers can form the address. */
12915 if (mode != SImode && mode != DImode)
12916 return NULL_RTX;
12917
12918 if (REG_P (op))
12919 return op;
12920 else if (GET_CODE (op) == SUBREG)
12921 {
12922 rtx reg = SUBREG_REG (op);
12923
12924 if (!REG_P (reg))
12925 return NULL_RTX;
12926
12927 mode = GET_MODE (reg);
12928
12929 /* Don't allow SUBREGs that span more than a word. It can
12930 lead to spill failures when the register is one word out
12931 of a two word structure. */
12932 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12933 return NULL_RTX;
12934
12935 /* Allow only SUBREGs of non-eliminable hard registers. */
12936 if (register_no_elim_operand (reg, mode))
12937 return reg;
12938 }
12939
12940 /* Op is not a register. */
12941 return NULL_RTX;
12942 }
12943
12944 /* Recognizes RTL expressions that are valid memory addresses for an
12945 instruction. The MODE argument is the machine mode for the MEM
12946 expression that wants to use this address.
12947
12948 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12949 convert common non-canonical forms to canonical form so that they will
12950 be recognized. */
12951
12952 static bool
12953 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12954 {
12955 struct ix86_address parts;
12956 rtx base, index, disp;
12957 HOST_WIDE_INT scale;
12958 enum ix86_address_seg seg;
12959
12960 if (ix86_decompose_address (addr, &parts) <= 0)
12961 /* Decomposition failed. */
12962 return false;
12963
12964 base = parts.base;
12965 index = parts.index;
12966 disp = parts.disp;
12967 scale = parts.scale;
12968 seg = parts.seg;
12969
12970 /* Validate base register. */
12971 if (base)
12972 {
12973 rtx reg = ix86_validate_address_register (base);
12974
12975 if (reg == NULL_RTX)
12976 return false;
12977
12978 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12979 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12980 /* Base is not valid. */
12981 return false;
12982 }
12983
12984 /* Validate index register. */
12985 if (index)
12986 {
12987 rtx reg = ix86_validate_address_register (index);
12988
12989 if (reg == NULL_RTX)
12990 return false;
12991
12992 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12993 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12994 /* Index is not valid. */
12995 return false;
12996 }
12997
12998 /* Index and base should have the same mode. */
12999 if (base && index
13000 && GET_MODE (base) != GET_MODE (index))
13001 return false;
13002
13003 /* Address override works only on the (%reg) part of %fs:(%reg). */
13004 if (seg != SEG_DEFAULT
13005 && ((base && GET_MODE (base) != word_mode)
13006 || (index && GET_MODE (index) != word_mode)))
13007 return false;
13008
13009 /* Validate scale factor. */
13010 if (scale != 1)
13011 {
13012 if (!index)
13013 /* Scale without index. */
13014 return false;
13015
13016 if (scale != 2 && scale != 4 && scale != 8)
13017 /* Scale is not a valid multiplier. */
13018 return false;
13019 }
13020
13021 /* Validate displacement. */
13022 if (disp)
13023 {
13024 if (GET_CODE (disp) == CONST
13025 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13026 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13027 switch (XINT (XEXP (disp, 0), 1))
13028 {
13029 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13030 used. While ABI specify also 32bit relocations, we don't produce
13031 them at all and use IP relative instead. */
13032 case UNSPEC_GOT:
13033 case UNSPEC_GOTOFF:
13034 gcc_assert (flag_pic);
13035 if (!TARGET_64BIT)
13036 goto is_legitimate_pic;
13037
13038 /* 64bit address unspec. */
13039 return false;
13040
13041 case UNSPEC_GOTPCREL:
13042 case UNSPEC_PCREL:
13043 gcc_assert (flag_pic);
13044 goto is_legitimate_pic;
13045
13046 case UNSPEC_GOTTPOFF:
13047 case UNSPEC_GOTNTPOFF:
13048 case UNSPEC_INDNTPOFF:
13049 case UNSPEC_NTPOFF:
13050 case UNSPEC_DTPOFF:
13051 break;
13052
13053 case UNSPEC_STACK_CHECK:
13054 gcc_assert (flag_split_stack);
13055 break;
13056
13057 default:
13058 /* Invalid address unspec. */
13059 return false;
13060 }
13061
13062 else if (SYMBOLIC_CONST (disp)
13063 && (flag_pic
13064 || (TARGET_MACHO
13065 #if TARGET_MACHO
13066 && MACHOPIC_INDIRECT
13067 && !machopic_operand_p (disp)
13068 #endif
13069 )))
13070 {
13071
13072 is_legitimate_pic:
13073 if (TARGET_64BIT && (index || base))
13074 {
13075 /* foo@dtpoff(%rX) is ok. */
13076 if (GET_CODE (disp) != CONST
13077 || GET_CODE (XEXP (disp, 0)) != PLUS
13078 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13079 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13080 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13081 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13082 /* Non-constant pic memory reference. */
13083 return false;
13084 }
13085 else if ((!TARGET_MACHO || flag_pic)
13086 && ! legitimate_pic_address_disp_p (disp))
13087 /* Displacement is an invalid pic construct. */
13088 return false;
13089 #if TARGET_MACHO
13090 else if (MACHO_DYNAMIC_NO_PIC_P
13091 && !ix86_legitimate_constant_p (Pmode, disp))
13092 /* displacment must be referenced via non_lazy_pointer */
13093 return false;
13094 #endif
13095
13096 /* This code used to verify that a symbolic pic displacement
13097 includes the pic_offset_table_rtx register.
13098
13099 While this is good idea, unfortunately these constructs may
13100 be created by "adds using lea" optimization for incorrect
13101 code like:
13102
13103 int a;
13104 int foo(int i)
13105 {
13106 return *(&a+i);
13107 }
13108
13109 This code is nonsensical, but results in addressing
13110 GOT table with pic_offset_table_rtx base. We can't
13111 just refuse it easily, since it gets matched by
13112 "addsi3" pattern, that later gets split to lea in the
13113 case output register differs from input. While this
13114 can be handled by separate addsi pattern for this case
13115 that never results in lea, this seems to be easier and
13116 correct fix for crash to disable this test. */
13117 }
13118 else if (GET_CODE (disp) != LABEL_REF
13119 && !CONST_INT_P (disp)
13120 && (GET_CODE (disp) != CONST
13121 || !ix86_legitimate_constant_p (Pmode, disp))
13122 && (GET_CODE (disp) != SYMBOL_REF
13123 || !ix86_legitimate_constant_p (Pmode, disp)))
13124 /* Displacement is not constant. */
13125 return false;
13126 else if (TARGET_64BIT
13127 && !x86_64_immediate_operand (disp, VOIDmode))
13128 /* Displacement is out of range. */
13129 return false;
13130 /* In x32 mode, constant addresses are sign extended to 64bit, so
13131 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13132 else if (TARGET_X32 && !(index || base)
13133 && CONST_INT_P (disp)
13134 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13135 return false;
13136 }
13137
13138 /* Everything looks valid. */
13139 return true;
13140 }
13141
13142 /* Determine if a given RTX is a valid constant address. */
13143
13144 bool
13145 constant_address_p (rtx x)
13146 {
13147 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13148 }
13149 \f
13150 /* Return a unique alias set for the GOT. */
13151
13152 static alias_set_type
13153 ix86_GOT_alias_set (void)
13154 {
13155 static alias_set_type set = -1;
13156 if (set == -1)
13157 set = new_alias_set ();
13158 return set;
13159 }
13160
13161 /* Set regs_ever_live for PIC base address register
13162 to true if required. */
13163 static void
13164 set_pic_reg_ever_live ()
13165 {
13166 if (reload_in_progress)
13167 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13168 }
13169
13170 /* Return a legitimate reference for ORIG (an address) using the
13171 register REG. If REG is 0, a new pseudo is generated.
13172
13173 There are two types of references that must be handled:
13174
13175 1. Global data references must load the address from the GOT, via
13176 the PIC reg. An insn is emitted to do this load, and the reg is
13177 returned.
13178
13179 2. Static data references, constant pool addresses, and code labels
13180 compute the address as an offset from the GOT, whose base is in
13181 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13182 differentiate them from global data objects. The returned
13183 address is the PIC reg + an unspec constant.
13184
13185 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13186 reg also appears in the address. */
13187
13188 static rtx
13189 legitimize_pic_address (rtx orig, rtx reg)
13190 {
13191 rtx addr = orig;
13192 rtx new_rtx = orig;
13193
13194 #if TARGET_MACHO
13195 if (TARGET_MACHO && !TARGET_64BIT)
13196 {
13197 if (reg == 0)
13198 reg = gen_reg_rtx (Pmode);
13199 /* Use the generic Mach-O PIC machinery. */
13200 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13201 }
13202 #endif
13203
13204 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13205 {
13206 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13207 if (tmp)
13208 return tmp;
13209 }
13210
13211 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13212 new_rtx = addr;
13213 else if (TARGET_64BIT && !TARGET_PECOFF
13214 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13215 {
13216 rtx tmpreg;
13217 /* This symbol may be referenced via a displacement from the PIC
13218 base address (@GOTOFF). */
13219
13220 set_pic_reg_ever_live ();
13221 if (GET_CODE (addr) == CONST)
13222 addr = XEXP (addr, 0);
13223 if (GET_CODE (addr) == PLUS)
13224 {
13225 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13226 UNSPEC_GOTOFF);
13227 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13228 }
13229 else
13230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13231 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13232 if (!reg)
13233 tmpreg = gen_reg_rtx (Pmode);
13234 else
13235 tmpreg = reg;
13236 emit_move_insn (tmpreg, new_rtx);
13237
13238 if (reg != 0)
13239 {
13240 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13241 tmpreg, 1, OPTAB_DIRECT);
13242 new_rtx = reg;
13243 }
13244 else
13245 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13246 }
13247 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13248 {
13249 /* This symbol may be referenced via a displacement from the PIC
13250 base address (@GOTOFF). */
13251
13252 set_pic_reg_ever_live ();
13253 if (GET_CODE (addr) == CONST)
13254 addr = XEXP (addr, 0);
13255 if (GET_CODE (addr) == PLUS)
13256 {
13257 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13258 UNSPEC_GOTOFF);
13259 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13260 }
13261 else
13262 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13263 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13264 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13265
13266 if (reg != 0)
13267 {
13268 emit_move_insn (reg, new_rtx);
13269 new_rtx = reg;
13270 }
13271 }
13272 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13273 /* We can't use @GOTOFF for text labels on VxWorks;
13274 see gotoff_operand. */
13275 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13276 {
13277 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13278 if (tmp)
13279 return tmp;
13280
13281 /* For x64 PE-COFF there is no GOT table. So we use address
13282 directly. */
13283 if (TARGET_64BIT && TARGET_PECOFF)
13284 {
13285 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13286 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13287
13288 if (reg == 0)
13289 reg = gen_reg_rtx (Pmode);
13290 emit_move_insn (reg, new_rtx);
13291 new_rtx = reg;
13292 }
13293 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13294 {
13295 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13296 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13297 new_rtx = gen_const_mem (Pmode, new_rtx);
13298 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13299
13300 if (reg == 0)
13301 reg = gen_reg_rtx (Pmode);
13302 /* Use directly gen_movsi, otherwise the address is loaded
13303 into register for CSE. We don't want to CSE this addresses,
13304 instead we CSE addresses from the GOT table, so skip this. */
13305 emit_insn (gen_movsi (reg, new_rtx));
13306 new_rtx = reg;
13307 }
13308 else
13309 {
13310 /* This symbol must be referenced via a load from the
13311 Global Offset Table (@GOT). */
13312
13313 set_pic_reg_ever_live ();
13314 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13315 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13316 if (TARGET_64BIT)
13317 new_rtx = force_reg (Pmode, new_rtx);
13318 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13319 new_rtx = gen_const_mem (Pmode, new_rtx);
13320 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13321
13322 if (reg == 0)
13323 reg = gen_reg_rtx (Pmode);
13324 emit_move_insn (reg, new_rtx);
13325 new_rtx = reg;
13326 }
13327 }
13328 else
13329 {
13330 if (CONST_INT_P (addr)
13331 && !x86_64_immediate_operand (addr, VOIDmode))
13332 {
13333 if (reg)
13334 {
13335 emit_move_insn (reg, addr);
13336 new_rtx = reg;
13337 }
13338 else
13339 new_rtx = force_reg (Pmode, addr);
13340 }
13341 else if (GET_CODE (addr) == CONST)
13342 {
13343 addr = XEXP (addr, 0);
13344
13345 /* We must match stuff we generate before. Assume the only
13346 unspecs that can get here are ours. Not that we could do
13347 anything with them anyway.... */
13348 if (GET_CODE (addr) == UNSPEC
13349 || (GET_CODE (addr) == PLUS
13350 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13351 return orig;
13352 gcc_assert (GET_CODE (addr) == PLUS);
13353 }
13354 if (GET_CODE (addr) == PLUS)
13355 {
13356 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13357
13358 /* Check first to see if this is a constant offset from a @GOTOFF
13359 symbol reference. */
13360 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13361 && CONST_INT_P (op1))
13362 {
13363 if (!TARGET_64BIT)
13364 {
13365 set_pic_reg_ever_live ();
13366 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13367 UNSPEC_GOTOFF);
13368 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13369 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13370 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13371
13372 if (reg != 0)
13373 {
13374 emit_move_insn (reg, new_rtx);
13375 new_rtx = reg;
13376 }
13377 }
13378 else
13379 {
13380 if (INTVAL (op1) < -16*1024*1024
13381 || INTVAL (op1) >= 16*1024*1024)
13382 {
13383 if (!x86_64_immediate_operand (op1, Pmode))
13384 op1 = force_reg (Pmode, op1);
13385 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13386 }
13387 }
13388 }
13389 else
13390 {
13391 rtx base = legitimize_pic_address (op0, reg);
13392 enum machine_mode mode = GET_MODE (base);
13393 new_rtx
13394 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13395
13396 if (CONST_INT_P (new_rtx))
13397 {
13398 if (INTVAL (new_rtx) < -16*1024*1024
13399 || INTVAL (new_rtx) >= 16*1024*1024)
13400 {
13401 if (!x86_64_immediate_operand (new_rtx, mode))
13402 new_rtx = force_reg (mode, new_rtx);
13403 new_rtx
13404 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13405 }
13406 else
13407 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13408 }
13409 else
13410 {
13411 if (GET_CODE (new_rtx) == PLUS
13412 && CONSTANT_P (XEXP (new_rtx, 1)))
13413 {
13414 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13415 new_rtx = XEXP (new_rtx, 1);
13416 }
13417 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13418 }
13419 }
13420 }
13421 }
13422 return new_rtx;
13423 }
13424 \f
13425 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13426
13427 static rtx
13428 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13429 {
13430 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13431
13432 if (GET_MODE (tp) != tp_mode)
13433 {
13434 gcc_assert (GET_MODE (tp) == SImode);
13435 gcc_assert (tp_mode == DImode);
13436
13437 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13438 }
13439
13440 if (to_reg)
13441 tp = copy_to_mode_reg (tp_mode, tp);
13442
13443 return tp;
13444 }
13445
13446 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13447
13448 static GTY(()) rtx ix86_tls_symbol;
13449
13450 static rtx
13451 ix86_tls_get_addr (void)
13452 {
13453 if (!ix86_tls_symbol)
13454 {
13455 const char *sym
13456 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13457 ? "___tls_get_addr" : "__tls_get_addr");
13458
13459 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13460 }
13461
13462 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13463 {
13464 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13465 UNSPEC_PLTOFF);
13466 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13467 gen_rtx_CONST (Pmode, unspec));
13468 }
13469
13470 return ix86_tls_symbol;
13471 }
13472
13473 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13474
13475 static GTY(()) rtx ix86_tls_module_base_symbol;
13476
13477 rtx
13478 ix86_tls_module_base (void)
13479 {
13480 if (!ix86_tls_module_base_symbol)
13481 {
13482 ix86_tls_module_base_symbol
13483 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13484
13485 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13486 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13487 }
13488
13489 return ix86_tls_module_base_symbol;
13490 }
13491
13492 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13493 false if we expect this to be used for a memory address and true if
13494 we expect to load the address into a register. */
13495
13496 static rtx
13497 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13498 {
13499 rtx dest, base, off;
13500 rtx pic = NULL_RTX, tp = NULL_RTX;
13501 enum machine_mode tp_mode = Pmode;
13502 int type;
13503
13504 /* Fall back to global dynamic model if tool chain cannot support local
13505 dynamic. */
13506 if (TARGET_SUN_TLS && !TARGET_64BIT
13507 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13508 && model == TLS_MODEL_LOCAL_DYNAMIC)
13509 model = TLS_MODEL_GLOBAL_DYNAMIC;
13510
13511 switch (model)
13512 {
13513 case TLS_MODEL_GLOBAL_DYNAMIC:
13514 dest = gen_reg_rtx (Pmode);
13515
13516 if (!TARGET_64BIT)
13517 {
13518 if (flag_pic && !TARGET_PECOFF)
13519 pic = pic_offset_table_rtx;
13520 else
13521 {
13522 pic = gen_reg_rtx (Pmode);
13523 emit_insn (gen_set_got (pic));
13524 }
13525 }
13526
13527 if (TARGET_GNU2_TLS)
13528 {
13529 if (TARGET_64BIT)
13530 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13531 else
13532 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13533
13534 tp = get_thread_pointer (Pmode, true);
13535 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13536
13537 if (GET_MODE (x) != Pmode)
13538 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13539
13540 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13541 }
13542 else
13543 {
13544 rtx caddr = ix86_tls_get_addr ();
13545
13546 if (TARGET_64BIT)
13547 {
13548 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13549 rtx_insn *insns;
13550
13551 start_sequence ();
13552 emit_call_insn
13553 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13554 insns = get_insns ();
13555 end_sequence ();
13556
13557 if (GET_MODE (x) != Pmode)
13558 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13559
13560 RTL_CONST_CALL_P (insns) = 1;
13561 emit_libcall_block (insns, dest, rax, x);
13562 }
13563 else
13564 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13565 }
13566 break;
13567
13568 case TLS_MODEL_LOCAL_DYNAMIC:
13569 base = gen_reg_rtx (Pmode);
13570
13571 if (!TARGET_64BIT)
13572 {
13573 if (flag_pic)
13574 pic = pic_offset_table_rtx;
13575 else
13576 {
13577 pic = gen_reg_rtx (Pmode);
13578 emit_insn (gen_set_got (pic));
13579 }
13580 }
13581
13582 if (TARGET_GNU2_TLS)
13583 {
13584 rtx tmp = ix86_tls_module_base ();
13585
13586 if (TARGET_64BIT)
13587 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13588 else
13589 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13590
13591 tp = get_thread_pointer (Pmode, true);
13592 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13593 gen_rtx_MINUS (Pmode, tmp, tp));
13594 }
13595 else
13596 {
13597 rtx caddr = ix86_tls_get_addr ();
13598
13599 if (TARGET_64BIT)
13600 {
13601 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13602 rtx_insn *insns;
13603 rtx eqv;
13604
13605 start_sequence ();
13606 emit_call_insn
13607 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13608 insns = get_insns ();
13609 end_sequence ();
13610
13611 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13612 share the LD_BASE result with other LD model accesses. */
13613 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13614 UNSPEC_TLS_LD_BASE);
13615
13616 RTL_CONST_CALL_P (insns) = 1;
13617 emit_libcall_block (insns, base, rax, eqv);
13618 }
13619 else
13620 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13621 }
13622
13623 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13624 off = gen_rtx_CONST (Pmode, off);
13625
13626 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13627
13628 if (TARGET_GNU2_TLS)
13629 {
13630 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13631
13632 if (GET_MODE (x) != Pmode)
13633 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13634
13635 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13636 }
13637 break;
13638
13639 case TLS_MODEL_INITIAL_EXEC:
13640 if (TARGET_64BIT)
13641 {
13642 if (TARGET_SUN_TLS && !TARGET_X32)
13643 {
13644 /* The Sun linker took the AMD64 TLS spec literally
13645 and can only handle %rax as destination of the
13646 initial executable code sequence. */
13647
13648 dest = gen_reg_rtx (DImode);
13649 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13650 return dest;
13651 }
13652
13653 /* Generate DImode references to avoid %fs:(%reg32)
13654 problems and linker IE->LE relaxation bug. */
13655 tp_mode = DImode;
13656 pic = NULL;
13657 type = UNSPEC_GOTNTPOFF;
13658 }
13659 else if (flag_pic)
13660 {
13661 set_pic_reg_ever_live ();
13662 pic = pic_offset_table_rtx;
13663 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13664 }
13665 else if (!TARGET_ANY_GNU_TLS)
13666 {
13667 pic = gen_reg_rtx (Pmode);
13668 emit_insn (gen_set_got (pic));
13669 type = UNSPEC_GOTTPOFF;
13670 }
13671 else
13672 {
13673 pic = NULL;
13674 type = UNSPEC_INDNTPOFF;
13675 }
13676
13677 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13678 off = gen_rtx_CONST (tp_mode, off);
13679 if (pic)
13680 off = gen_rtx_PLUS (tp_mode, pic, off);
13681 off = gen_const_mem (tp_mode, off);
13682 set_mem_alias_set (off, ix86_GOT_alias_set ());
13683
13684 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13685 {
13686 base = get_thread_pointer (tp_mode,
13687 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13688 off = force_reg (tp_mode, off);
13689 return gen_rtx_PLUS (tp_mode, base, off);
13690 }
13691 else
13692 {
13693 base = get_thread_pointer (Pmode, true);
13694 dest = gen_reg_rtx (Pmode);
13695 emit_insn (ix86_gen_sub3 (dest, base, off));
13696 }
13697 break;
13698
13699 case TLS_MODEL_LOCAL_EXEC:
13700 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13701 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13702 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13703 off = gen_rtx_CONST (Pmode, off);
13704
13705 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13706 {
13707 base = get_thread_pointer (Pmode,
13708 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13709 return gen_rtx_PLUS (Pmode, base, off);
13710 }
13711 else
13712 {
13713 base = get_thread_pointer (Pmode, true);
13714 dest = gen_reg_rtx (Pmode);
13715 emit_insn (ix86_gen_sub3 (dest, base, off));
13716 }
13717 break;
13718
13719 default:
13720 gcc_unreachable ();
13721 }
13722
13723 return dest;
13724 }
13725
13726 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13727 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13728 unique refptr-DECL symbol corresponding to symbol DECL. */
13729
13730 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13731 htab_t dllimport_map;
13732
13733 static tree
13734 get_dllimport_decl (tree decl, bool beimport)
13735 {
13736 struct tree_map *h, in;
13737 void **loc;
13738 const char *name;
13739 const char *prefix;
13740 size_t namelen, prefixlen;
13741 char *imp_name;
13742 tree to;
13743 rtx rtl;
13744
13745 if (!dllimport_map)
13746 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13747
13748 in.hash = htab_hash_pointer (decl);
13749 in.base.from = decl;
13750 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13751 h = (struct tree_map *) *loc;
13752 if (h)
13753 return h->to;
13754
13755 *loc = h = ggc_alloc<tree_map> ();
13756 h->hash = in.hash;
13757 h->base.from = decl;
13758 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13759 VAR_DECL, NULL, ptr_type_node);
13760 DECL_ARTIFICIAL (to) = 1;
13761 DECL_IGNORED_P (to) = 1;
13762 DECL_EXTERNAL (to) = 1;
13763 TREE_READONLY (to) = 1;
13764
13765 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13766 name = targetm.strip_name_encoding (name);
13767 if (beimport)
13768 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13769 ? "*__imp_" : "*__imp__";
13770 else
13771 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13772 namelen = strlen (name);
13773 prefixlen = strlen (prefix);
13774 imp_name = (char *) alloca (namelen + prefixlen + 1);
13775 memcpy (imp_name, prefix, prefixlen);
13776 memcpy (imp_name + prefixlen, name, namelen + 1);
13777
13778 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13779 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13780 SET_SYMBOL_REF_DECL (rtl, to);
13781 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13782 if (!beimport)
13783 {
13784 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13785 #ifdef SUB_TARGET_RECORD_STUB
13786 SUB_TARGET_RECORD_STUB (name);
13787 #endif
13788 }
13789
13790 rtl = gen_const_mem (Pmode, rtl);
13791 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13792
13793 SET_DECL_RTL (to, rtl);
13794 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13795
13796 return to;
13797 }
13798
13799 /* Expand SYMBOL into its corresponding far-addresse symbol.
13800 WANT_REG is true if we require the result be a register. */
13801
13802 static rtx
13803 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13804 {
13805 tree imp_decl;
13806 rtx x;
13807
13808 gcc_assert (SYMBOL_REF_DECL (symbol));
13809 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13810
13811 x = DECL_RTL (imp_decl);
13812 if (want_reg)
13813 x = force_reg (Pmode, x);
13814 return x;
13815 }
13816
13817 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13818 true if we require the result be a register. */
13819
13820 static rtx
13821 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13822 {
13823 tree imp_decl;
13824 rtx x;
13825
13826 gcc_assert (SYMBOL_REF_DECL (symbol));
13827 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13828
13829 x = DECL_RTL (imp_decl);
13830 if (want_reg)
13831 x = force_reg (Pmode, x);
13832 return x;
13833 }
13834
13835 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13836 is true if we require the result be a register. */
13837
13838 static rtx
13839 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13840 {
13841 if (!TARGET_PECOFF)
13842 return NULL_RTX;
13843
13844 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13845 {
13846 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13847 return legitimize_dllimport_symbol (addr, inreg);
13848 if (GET_CODE (addr) == CONST
13849 && GET_CODE (XEXP (addr, 0)) == PLUS
13850 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13851 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13852 {
13853 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13854 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13855 }
13856 }
13857
13858 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13859 return NULL_RTX;
13860 if (GET_CODE (addr) == SYMBOL_REF
13861 && !is_imported_p (addr)
13862 && SYMBOL_REF_EXTERNAL_P (addr)
13863 && SYMBOL_REF_DECL (addr))
13864 return legitimize_pe_coff_extern_decl (addr, inreg);
13865
13866 if (GET_CODE (addr) == CONST
13867 && GET_CODE (XEXP (addr, 0)) == PLUS
13868 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13869 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13870 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13871 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13872 {
13873 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13874 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13875 }
13876 return NULL_RTX;
13877 }
13878
13879 /* Try machine-dependent ways of modifying an illegitimate address
13880 to be legitimate. If we find one, return the new, valid address.
13881 This macro is used in only one place: `memory_address' in explow.c.
13882
13883 OLDX is the address as it was before break_out_memory_refs was called.
13884 In some cases it is useful to look at this to decide what needs to be done.
13885
13886 It is always safe for this macro to do nothing. It exists to recognize
13887 opportunities to optimize the output.
13888
13889 For the 80386, we handle X+REG by loading X into a register R and
13890 using R+REG. R will go in a general reg and indexing will be used.
13891 However, if REG is a broken-out memory address or multiplication,
13892 nothing needs to be done because REG can certainly go in a general reg.
13893
13894 When -fpic is used, special handling is needed for symbolic references.
13895 See comments by legitimize_pic_address in i386.c for details. */
13896
13897 static rtx
13898 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13899 {
13900 int changed = 0;
13901 unsigned log;
13902
13903 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13904 if (log)
13905 return legitimize_tls_address (x, (enum tls_model) log, false);
13906 if (GET_CODE (x) == CONST
13907 && GET_CODE (XEXP (x, 0)) == PLUS
13908 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13909 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13910 {
13911 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13912 (enum tls_model) log, false);
13913 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13914 }
13915
13916 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13917 {
13918 rtx tmp = legitimize_pe_coff_symbol (x, true);
13919 if (tmp)
13920 return tmp;
13921 }
13922
13923 if (flag_pic && SYMBOLIC_CONST (x))
13924 return legitimize_pic_address (x, 0);
13925
13926 #if TARGET_MACHO
13927 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13928 return machopic_indirect_data_reference (x, 0);
13929 #endif
13930
13931 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13932 if (GET_CODE (x) == ASHIFT
13933 && CONST_INT_P (XEXP (x, 1))
13934 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13935 {
13936 changed = 1;
13937 log = INTVAL (XEXP (x, 1));
13938 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13939 GEN_INT (1 << log));
13940 }
13941
13942 if (GET_CODE (x) == PLUS)
13943 {
13944 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13945
13946 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13947 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13948 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13949 {
13950 changed = 1;
13951 log = INTVAL (XEXP (XEXP (x, 0), 1));
13952 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13953 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13954 GEN_INT (1 << log));
13955 }
13956
13957 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13958 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13959 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13960 {
13961 changed = 1;
13962 log = INTVAL (XEXP (XEXP (x, 1), 1));
13963 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13964 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13965 GEN_INT (1 << log));
13966 }
13967
13968 /* Put multiply first if it isn't already. */
13969 if (GET_CODE (XEXP (x, 1)) == MULT)
13970 {
13971 rtx tmp = XEXP (x, 0);
13972 XEXP (x, 0) = XEXP (x, 1);
13973 XEXP (x, 1) = tmp;
13974 changed = 1;
13975 }
13976
13977 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13978 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13979 created by virtual register instantiation, register elimination, and
13980 similar optimizations. */
13981 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13982 {
13983 changed = 1;
13984 x = gen_rtx_PLUS (Pmode,
13985 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13986 XEXP (XEXP (x, 1), 0)),
13987 XEXP (XEXP (x, 1), 1));
13988 }
13989
13990 /* Canonicalize
13991 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13992 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13993 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13994 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13995 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13996 && CONSTANT_P (XEXP (x, 1)))
13997 {
13998 rtx constant;
13999 rtx other = NULL_RTX;
14000
14001 if (CONST_INT_P (XEXP (x, 1)))
14002 {
14003 constant = XEXP (x, 1);
14004 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14005 }
14006 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14007 {
14008 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14009 other = XEXP (x, 1);
14010 }
14011 else
14012 constant = 0;
14013
14014 if (constant)
14015 {
14016 changed = 1;
14017 x = gen_rtx_PLUS (Pmode,
14018 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14019 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14020 plus_constant (Pmode, other,
14021 INTVAL (constant)));
14022 }
14023 }
14024
14025 if (changed && ix86_legitimate_address_p (mode, x, false))
14026 return x;
14027
14028 if (GET_CODE (XEXP (x, 0)) == MULT)
14029 {
14030 changed = 1;
14031 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14032 }
14033
14034 if (GET_CODE (XEXP (x, 1)) == MULT)
14035 {
14036 changed = 1;
14037 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14038 }
14039
14040 if (changed
14041 && REG_P (XEXP (x, 1))
14042 && REG_P (XEXP (x, 0)))
14043 return x;
14044
14045 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14046 {
14047 changed = 1;
14048 x = legitimize_pic_address (x, 0);
14049 }
14050
14051 if (changed && ix86_legitimate_address_p (mode, x, false))
14052 return x;
14053
14054 if (REG_P (XEXP (x, 0)))
14055 {
14056 rtx temp = gen_reg_rtx (Pmode);
14057 rtx val = force_operand (XEXP (x, 1), temp);
14058 if (val != temp)
14059 {
14060 val = convert_to_mode (Pmode, val, 1);
14061 emit_move_insn (temp, val);
14062 }
14063
14064 XEXP (x, 1) = temp;
14065 return x;
14066 }
14067
14068 else if (REG_P (XEXP (x, 1)))
14069 {
14070 rtx temp = gen_reg_rtx (Pmode);
14071 rtx val = force_operand (XEXP (x, 0), temp);
14072 if (val != temp)
14073 {
14074 val = convert_to_mode (Pmode, val, 1);
14075 emit_move_insn (temp, val);
14076 }
14077
14078 XEXP (x, 0) = temp;
14079 return x;
14080 }
14081 }
14082
14083 return x;
14084 }
14085 \f
14086 /* Print an integer constant expression in assembler syntax. Addition
14087 and subtraction are the only arithmetic that may appear in these
14088 expressions. FILE is the stdio stream to write to, X is the rtx, and
14089 CODE is the operand print code from the output string. */
14090
14091 static void
14092 output_pic_addr_const (FILE *file, rtx x, int code)
14093 {
14094 char buf[256];
14095
14096 switch (GET_CODE (x))
14097 {
14098 case PC:
14099 gcc_assert (flag_pic);
14100 putc ('.', file);
14101 break;
14102
14103 case SYMBOL_REF:
14104 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14105 output_addr_const (file, x);
14106 else
14107 {
14108 const char *name = XSTR (x, 0);
14109
14110 /* Mark the decl as referenced so that cgraph will
14111 output the function. */
14112 if (SYMBOL_REF_DECL (x))
14113 mark_decl_referenced (SYMBOL_REF_DECL (x));
14114
14115 #if TARGET_MACHO
14116 if (MACHOPIC_INDIRECT
14117 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14118 name = machopic_indirection_name (x, /*stub_p=*/true);
14119 #endif
14120 assemble_name (file, name);
14121 }
14122 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14123 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14124 fputs ("@PLT", file);
14125 break;
14126
14127 case LABEL_REF:
14128 x = XEXP (x, 0);
14129 /* FALLTHRU */
14130 case CODE_LABEL:
14131 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14132 assemble_name (asm_out_file, buf);
14133 break;
14134
14135 case CONST_INT:
14136 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14137 break;
14138
14139 case CONST:
14140 /* This used to output parentheses around the expression,
14141 but that does not work on the 386 (either ATT or BSD assembler). */
14142 output_pic_addr_const (file, XEXP (x, 0), code);
14143 break;
14144
14145 case CONST_DOUBLE:
14146 if (GET_MODE (x) == VOIDmode)
14147 {
14148 /* We can use %d if the number is <32 bits and positive. */
14149 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14150 fprintf (file, "0x%lx%08lx",
14151 (unsigned long) CONST_DOUBLE_HIGH (x),
14152 (unsigned long) CONST_DOUBLE_LOW (x));
14153 else
14154 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14155 }
14156 else
14157 /* We can't handle floating point constants;
14158 TARGET_PRINT_OPERAND must handle them. */
14159 output_operand_lossage ("floating constant misused");
14160 break;
14161
14162 case PLUS:
14163 /* Some assemblers need integer constants to appear first. */
14164 if (CONST_INT_P (XEXP (x, 0)))
14165 {
14166 output_pic_addr_const (file, XEXP (x, 0), code);
14167 putc ('+', file);
14168 output_pic_addr_const (file, XEXP (x, 1), code);
14169 }
14170 else
14171 {
14172 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14173 output_pic_addr_const (file, XEXP (x, 1), code);
14174 putc ('+', file);
14175 output_pic_addr_const (file, XEXP (x, 0), code);
14176 }
14177 break;
14178
14179 case MINUS:
14180 if (!TARGET_MACHO)
14181 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14182 output_pic_addr_const (file, XEXP (x, 0), code);
14183 putc ('-', file);
14184 output_pic_addr_const (file, XEXP (x, 1), code);
14185 if (!TARGET_MACHO)
14186 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14187 break;
14188
14189 case UNSPEC:
14190 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14191 {
14192 bool f = i386_asm_output_addr_const_extra (file, x);
14193 gcc_assert (f);
14194 break;
14195 }
14196
14197 gcc_assert (XVECLEN (x, 0) == 1);
14198 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14199 switch (XINT (x, 1))
14200 {
14201 case UNSPEC_GOT:
14202 fputs ("@GOT", file);
14203 break;
14204 case UNSPEC_GOTOFF:
14205 fputs ("@GOTOFF", file);
14206 break;
14207 case UNSPEC_PLTOFF:
14208 fputs ("@PLTOFF", file);
14209 break;
14210 case UNSPEC_PCREL:
14211 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14212 "(%rip)" : "[rip]", file);
14213 break;
14214 case UNSPEC_GOTPCREL:
14215 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14216 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14217 break;
14218 case UNSPEC_GOTTPOFF:
14219 /* FIXME: This might be @TPOFF in Sun ld too. */
14220 fputs ("@gottpoff", file);
14221 break;
14222 case UNSPEC_TPOFF:
14223 fputs ("@tpoff", file);
14224 break;
14225 case UNSPEC_NTPOFF:
14226 if (TARGET_64BIT)
14227 fputs ("@tpoff", file);
14228 else
14229 fputs ("@ntpoff", file);
14230 break;
14231 case UNSPEC_DTPOFF:
14232 fputs ("@dtpoff", file);
14233 break;
14234 case UNSPEC_GOTNTPOFF:
14235 if (TARGET_64BIT)
14236 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14237 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14238 else
14239 fputs ("@gotntpoff", file);
14240 break;
14241 case UNSPEC_INDNTPOFF:
14242 fputs ("@indntpoff", file);
14243 break;
14244 #if TARGET_MACHO
14245 case UNSPEC_MACHOPIC_OFFSET:
14246 putc ('-', file);
14247 machopic_output_function_base_name (file);
14248 break;
14249 #endif
14250 default:
14251 output_operand_lossage ("invalid UNSPEC as operand");
14252 break;
14253 }
14254 break;
14255
14256 default:
14257 output_operand_lossage ("invalid expression as operand");
14258 }
14259 }
14260
14261 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14262 We need to emit DTP-relative relocations. */
14263
14264 static void ATTRIBUTE_UNUSED
14265 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14266 {
14267 fputs (ASM_LONG, file);
14268 output_addr_const (file, x);
14269 fputs ("@dtpoff", file);
14270 switch (size)
14271 {
14272 case 4:
14273 break;
14274 case 8:
14275 fputs (", 0", file);
14276 break;
14277 default:
14278 gcc_unreachable ();
14279 }
14280 }
14281
14282 /* Return true if X is a representation of the PIC register. This copes
14283 with calls from ix86_find_base_term, where the register might have
14284 been replaced by a cselib value. */
14285
14286 static bool
14287 ix86_pic_register_p (rtx x)
14288 {
14289 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14290 return (pic_offset_table_rtx
14291 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14292 else if (!REG_P (x))
14293 return false;
14294 else if (pic_offset_table_rtx)
14295 {
14296 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14297 return true;
14298 if (HARD_REGISTER_P (x)
14299 && !HARD_REGISTER_P (pic_offset_table_rtx)
14300 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14301 return true;
14302 return false;
14303 }
14304 else
14305 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14306 }
14307
14308 /* Helper function for ix86_delegitimize_address.
14309 Attempt to delegitimize TLS local-exec accesses. */
14310
14311 static rtx
14312 ix86_delegitimize_tls_address (rtx orig_x)
14313 {
14314 rtx x = orig_x, unspec;
14315 struct ix86_address addr;
14316
14317 if (!TARGET_TLS_DIRECT_SEG_REFS)
14318 return orig_x;
14319 if (MEM_P (x))
14320 x = XEXP (x, 0);
14321 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14322 return orig_x;
14323 if (ix86_decompose_address (x, &addr) == 0
14324 || addr.seg != DEFAULT_TLS_SEG_REG
14325 || addr.disp == NULL_RTX
14326 || GET_CODE (addr.disp) != CONST)
14327 return orig_x;
14328 unspec = XEXP (addr.disp, 0);
14329 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14330 unspec = XEXP (unspec, 0);
14331 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14332 return orig_x;
14333 x = XVECEXP (unspec, 0, 0);
14334 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14335 if (unspec != XEXP (addr.disp, 0))
14336 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14337 if (addr.index)
14338 {
14339 rtx idx = addr.index;
14340 if (addr.scale != 1)
14341 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14342 x = gen_rtx_PLUS (Pmode, idx, x);
14343 }
14344 if (addr.base)
14345 x = gen_rtx_PLUS (Pmode, addr.base, x);
14346 if (MEM_P (orig_x))
14347 x = replace_equiv_address_nv (orig_x, x);
14348 return x;
14349 }
14350
14351 /* In the name of slightly smaller debug output, and to cater to
14352 general assembler lossage, recognize PIC+GOTOFF and turn it back
14353 into a direct symbol reference.
14354
14355 On Darwin, this is necessary to avoid a crash, because Darwin
14356 has a different PIC label for each routine but the DWARF debugging
14357 information is not associated with any particular routine, so it's
14358 necessary to remove references to the PIC label from RTL stored by
14359 the DWARF output code. */
14360
14361 static rtx
14362 ix86_delegitimize_address (rtx x)
14363 {
14364 rtx orig_x = delegitimize_mem_from_attrs (x);
14365 /* addend is NULL or some rtx if x is something+GOTOFF where
14366 something doesn't include the PIC register. */
14367 rtx addend = NULL_RTX;
14368 /* reg_addend is NULL or a multiple of some register. */
14369 rtx reg_addend = NULL_RTX;
14370 /* const_addend is NULL or a const_int. */
14371 rtx const_addend = NULL_RTX;
14372 /* This is the result, or NULL. */
14373 rtx result = NULL_RTX;
14374
14375 x = orig_x;
14376
14377 if (MEM_P (x))
14378 x = XEXP (x, 0);
14379
14380 if (TARGET_64BIT)
14381 {
14382 if (GET_CODE (x) == CONST
14383 && GET_CODE (XEXP (x, 0)) == PLUS
14384 && GET_MODE (XEXP (x, 0)) == Pmode
14385 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14386 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14387 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14388 {
14389 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14390 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14391 if (MEM_P (orig_x))
14392 x = replace_equiv_address_nv (orig_x, x);
14393 return x;
14394 }
14395
14396 if (GET_CODE (x) == CONST
14397 && GET_CODE (XEXP (x, 0)) == UNSPEC
14398 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14399 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14400 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14401 {
14402 x = XVECEXP (XEXP (x, 0), 0, 0);
14403 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14404 {
14405 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14406 GET_MODE (x), 0);
14407 if (x == NULL_RTX)
14408 return orig_x;
14409 }
14410 return x;
14411 }
14412
14413 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14414 return ix86_delegitimize_tls_address (orig_x);
14415
14416 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14417 and -mcmodel=medium -fpic. */
14418 }
14419
14420 if (GET_CODE (x) != PLUS
14421 || GET_CODE (XEXP (x, 1)) != CONST)
14422 return ix86_delegitimize_tls_address (orig_x);
14423
14424 if (ix86_pic_register_p (XEXP (x, 0)))
14425 /* %ebx + GOT/GOTOFF */
14426 ;
14427 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14428 {
14429 /* %ebx + %reg * scale + GOT/GOTOFF */
14430 reg_addend = XEXP (x, 0);
14431 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14432 reg_addend = XEXP (reg_addend, 1);
14433 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14434 reg_addend = XEXP (reg_addend, 0);
14435 else
14436 {
14437 reg_addend = NULL_RTX;
14438 addend = XEXP (x, 0);
14439 }
14440 }
14441 else
14442 addend = XEXP (x, 0);
14443
14444 x = XEXP (XEXP (x, 1), 0);
14445 if (GET_CODE (x) == PLUS
14446 && CONST_INT_P (XEXP (x, 1)))
14447 {
14448 const_addend = XEXP (x, 1);
14449 x = XEXP (x, 0);
14450 }
14451
14452 if (GET_CODE (x) == UNSPEC
14453 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14454 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14455 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14456 && !MEM_P (orig_x) && !addend)))
14457 result = XVECEXP (x, 0, 0);
14458
14459 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14460 && !MEM_P (orig_x))
14461 result = XVECEXP (x, 0, 0);
14462
14463 if (! result)
14464 return ix86_delegitimize_tls_address (orig_x);
14465
14466 if (const_addend)
14467 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14468 if (reg_addend)
14469 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14470 if (addend)
14471 {
14472 /* If the rest of original X doesn't involve the PIC register, add
14473 addend and subtract pic_offset_table_rtx. This can happen e.g.
14474 for code like:
14475 leal (%ebx, %ecx, 4), %ecx
14476 ...
14477 movl foo@GOTOFF(%ecx), %edx
14478 in which case we return (%ecx - %ebx) + foo
14479 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14480 and reload has completed. */
14481 if (pic_offset_table_rtx
14482 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14483 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14484 pic_offset_table_rtx),
14485 result);
14486 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14487 {
14488 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14489 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14490 result = gen_rtx_PLUS (Pmode, tmp, result);
14491 }
14492 else
14493 return orig_x;
14494 }
14495 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14496 {
14497 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14498 if (result == NULL_RTX)
14499 return orig_x;
14500 }
14501 return result;
14502 }
14503
14504 /* If X is a machine specific address (i.e. a symbol or label being
14505 referenced as a displacement from the GOT implemented using an
14506 UNSPEC), then return the base term. Otherwise return X. */
14507
14508 rtx
14509 ix86_find_base_term (rtx x)
14510 {
14511 rtx term;
14512
14513 if (TARGET_64BIT)
14514 {
14515 if (GET_CODE (x) != CONST)
14516 return x;
14517 term = XEXP (x, 0);
14518 if (GET_CODE (term) == PLUS
14519 && (CONST_INT_P (XEXP (term, 1))
14520 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14521 term = XEXP (term, 0);
14522 if (GET_CODE (term) != UNSPEC
14523 || (XINT (term, 1) != UNSPEC_GOTPCREL
14524 && XINT (term, 1) != UNSPEC_PCREL))
14525 return x;
14526
14527 return XVECEXP (term, 0, 0);
14528 }
14529
14530 return ix86_delegitimize_address (x);
14531 }
14532 \f
14533 static void
14534 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14535 bool fp, FILE *file)
14536 {
14537 const char *suffix;
14538
14539 if (mode == CCFPmode || mode == CCFPUmode)
14540 {
14541 code = ix86_fp_compare_code_to_integer (code);
14542 mode = CCmode;
14543 }
14544 if (reverse)
14545 code = reverse_condition (code);
14546
14547 switch (code)
14548 {
14549 case EQ:
14550 switch (mode)
14551 {
14552 case CCAmode:
14553 suffix = "a";
14554 break;
14555
14556 case CCCmode:
14557 suffix = "c";
14558 break;
14559
14560 case CCOmode:
14561 suffix = "o";
14562 break;
14563
14564 case CCSmode:
14565 suffix = "s";
14566 break;
14567
14568 default:
14569 suffix = "e";
14570 }
14571 break;
14572 case NE:
14573 switch (mode)
14574 {
14575 case CCAmode:
14576 suffix = "na";
14577 break;
14578
14579 case CCCmode:
14580 suffix = "nc";
14581 break;
14582
14583 case CCOmode:
14584 suffix = "no";
14585 break;
14586
14587 case CCSmode:
14588 suffix = "ns";
14589 break;
14590
14591 default:
14592 suffix = "ne";
14593 }
14594 break;
14595 case GT:
14596 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14597 suffix = "g";
14598 break;
14599 case GTU:
14600 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14601 Those same assemblers have the same but opposite lossage on cmov. */
14602 if (mode == CCmode)
14603 suffix = fp ? "nbe" : "a";
14604 else
14605 gcc_unreachable ();
14606 break;
14607 case LT:
14608 switch (mode)
14609 {
14610 case CCNOmode:
14611 case CCGOCmode:
14612 suffix = "s";
14613 break;
14614
14615 case CCmode:
14616 case CCGCmode:
14617 suffix = "l";
14618 break;
14619
14620 default:
14621 gcc_unreachable ();
14622 }
14623 break;
14624 case LTU:
14625 if (mode == CCmode)
14626 suffix = "b";
14627 else if (mode == CCCmode)
14628 suffix = "c";
14629 else
14630 gcc_unreachable ();
14631 break;
14632 case GE:
14633 switch (mode)
14634 {
14635 case CCNOmode:
14636 case CCGOCmode:
14637 suffix = "ns";
14638 break;
14639
14640 case CCmode:
14641 case CCGCmode:
14642 suffix = "ge";
14643 break;
14644
14645 default:
14646 gcc_unreachable ();
14647 }
14648 break;
14649 case GEU:
14650 if (mode == CCmode)
14651 suffix = fp ? "nb" : "ae";
14652 else if (mode == CCCmode)
14653 suffix = "nc";
14654 else
14655 gcc_unreachable ();
14656 break;
14657 case LE:
14658 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14659 suffix = "le";
14660 break;
14661 case LEU:
14662 if (mode == CCmode)
14663 suffix = "be";
14664 else
14665 gcc_unreachable ();
14666 break;
14667 case UNORDERED:
14668 suffix = fp ? "u" : "p";
14669 break;
14670 case ORDERED:
14671 suffix = fp ? "nu" : "np";
14672 break;
14673 default:
14674 gcc_unreachable ();
14675 }
14676 fputs (suffix, file);
14677 }
14678
14679 /* Print the name of register X to FILE based on its machine mode and number.
14680 If CODE is 'w', pretend the mode is HImode.
14681 If CODE is 'b', pretend the mode is QImode.
14682 If CODE is 'k', pretend the mode is SImode.
14683 If CODE is 'q', pretend the mode is DImode.
14684 If CODE is 'x', pretend the mode is V4SFmode.
14685 If CODE is 't', pretend the mode is V8SFmode.
14686 If CODE is 'g', pretend the mode is V16SFmode.
14687 If CODE is 'h', pretend the reg is the 'high' byte register.
14688 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14689 If CODE is 'd', duplicate the operand for AVX instruction.
14690 */
14691
14692 void
14693 print_reg (rtx x, int code, FILE *file)
14694 {
14695 const char *reg;
14696 unsigned int regno;
14697 bool duplicated = code == 'd' && TARGET_AVX;
14698
14699 if (ASSEMBLER_DIALECT == ASM_ATT)
14700 putc ('%', file);
14701
14702 if (x == pc_rtx)
14703 {
14704 gcc_assert (TARGET_64BIT);
14705 fputs ("rip", file);
14706 return;
14707 }
14708
14709 regno = true_regnum (x);
14710 gcc_assert (regno != ARG_POINTER_REGNUM
14711 && regno != FRAME_POINTER_REGNUM
14712 && regno != FLAGS_REG
14713 && regno != FPSR_REG
14714 && regno != FPCR_REG);
14715
14716 if (code == 'w' || MMX_REG_P (x))
14717 code = 2;
14718 else if (code == 'b')
14719 code = 1;
14720 else if (code == 'k')
14721 code = 4;
14722 else if (code == 'q')
14723 code = 8;
14724 else if (code == 'y')
14725 code = 3;
14726 else if (code == 'h')
14727 code = 0;
14728 else if (code == 'x')
14729 code = 16;
14730 else if (code == 't')
14731 code = 32;
14732 else if (code == 'g')
14733 code = 64;
14734 else
14735 code = GET_MODE_SIZE (GET_MODE (x));
14736
14737 /* Irritatingly, AMD extended registers use different naming convention
14738 from the normal registers: "r%d[bwd]" */
14739 if (REX_INT_REGNO_P (regno))
14740 {
14741 gcc_assert (TARGET_64BIT);
14742 putc ('r', file);
14743 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14744 switch (code)
14745 {
14746 case 0:
14747 error ("extended registers have no high halves");
14748 break;
14749 case 1:
14750 putc ('b', file);
14751 break;
14752 case 2:
14753 putc ('w', file);
14754 break;
14755 case 4:
14756 putc ('d', file);
14757 break;
14758 case 8:
14759 /* no suffix */
14760 break;
14761 default:
14762 error ("unsupported operand size for extended register");
14763 break;
14764 }
14765 return;
14766 }
14767
14768 reg = NULL;
14769 switch (code)
14770 {
14771 case 3:
14772 if (STACK_TOP_P (x))
14773 {
14774 reg = "st(0)";
14775 break;
14776 }
14777 /* FALLTHRU */
14778 case 8:
14779 case 4:
14780 case 12:
14781 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14782 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14783 /* FALLTHRU */
14784 case 16:
14785 case 2:
14786 normal:
14787 reg = hi_reg_name[regno];
14788 break;
14789 case 1:
14790 if (regno >= ARRAY_SIZE (qi_reg_name))
14791 goto normal;
14792 reg = qi_reg_name[regno];
14793 break;
14794 case 0:
14795 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14796 goto normal;
14797 reg = qi_high_reg_name[regno];
14798 break;
14799 case 32:
14800 if (SSE_REG_P (x))
14801 {
14802 gcc_assert (!duplicated);
14803 putc ('y', file);
14804 fputs (hi_reg_name[regno] + 1, file);
14805 return;
14806 }
14807 case 64:
14808 if (SSE_REG_P (x))
14809 {
14810 gcc_assert (!duplicated);
14811 putc ('z', file);
14812 fputs (hi_reg_name[REGNO (x)] + 1, file);
14813 return;
14814 }
14815 break;
14816 default:
14817 gcc_unreachable ();
14818 }
14819
14820 fputs (reg, file);
14821 if (duplicated)
14822 {
14823 if (ASSEMBLER_DIALECT == ASM_ATT)
14824 fprintf (file, ", %%%s", reg);
14825 else
14826 fprintf (file, ", %s", reg);
14827 }
14828 }
14829
14830 /* Meaning of CODE:
14831 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14832 C -- print opcode suffix for set/cmov insn.
14833 c -- like C, but print reversed condition
14834 F,f -- likewise, but for floating-point.
14835 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14836 otherwise nothing
14837 R -- print embeded rounding and sae.
14838 r -- print only sae.
14839 z -- print the opcode suffix for the size of the current operand.
14840 Z -- likewise, with special suffixes for x87 instructions.
14841 * -- print a star (in certain assembler syntax)
14842 A -- print an absolute memory reference.
14843 E -- print address with DImode register names if TARGET_64BIT.
14844 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14845 s -- print a shift double count, followed by the assemblers argument
14846 delimiter.
14847 b -- print the QImode name of the register for the indicated operand.
14848 %b0 would print %al if operands[0] is reg 0.
14849 w -- likewise, print the HImode name of the register.
14850 k -- likewise, print the SImode name of the register.
14851 q -- likewise, print the DImode name of the register.
14852 x -- likewise, print the V4SFmode name of the register.
14853 t -- likewise, print the V8SFmode name of the register.
14854 g -- likewise, print the V16SFmode name of the register.
14855 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14856 y -- print "st(0)" instead of "st" as a register.
14857 d -- print duplicated register operand for AVX instruction.
14858 D -- print condition for SSE cmp instruction.
14859 P -- if PIC, print an @PLT suffix.
14860 p -- print raw symbol name.
14861 X -- don't print any sort of PIC '@' suffix for a symbol.
14862 & -- print some in-use local-dynamic symbol name.
14863 H -- print a memory address offset by 8; used for sse high-parts
14864 Y -- print condition for XOP pcom* instruction.
14865 + -- print a branch hint as 'cs' or 'ds' prefix
14866 ; -- print a semicolon (after prefixes due to bug in older gas).
14867 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14868 @ -- print a segment register of thread base pointer load
14869 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14870 */
14871
14872 void
14873 ix86_print_operand (FILE *file, rtx x, int code)
14874 {
14875 if (code)
14876 {
14877 switch (code)
14878 {
14879 case 'A':
14880 switch (ASSEMBLER_DIALECT)
14881 {
14882 case ASM_ATT:
14883 putc ('*', file);
14884 break;
14885
14886 case ASM_INTEL:
14887 /* Intel syntax. For absolute addresses, registers should not
14888 be surrounded by braces. */
14889 if (!REG_P (x))
14890 {
14891 putc ('[', file);
14892 ix86_print_operand (file, x, 0);
14893 putc (']', file);
14894 return;
14895 }
14896 break;
14897
14898 default:
14899 gcc_unreachable ();
14900 }
14901
14902 ix86_print_operand (file, x, 0);
14903 return;
14904
14905 case 'E':
14906 /* Wrap address in an UNSPEC to declare special handling. */
14907 if (TARGET_64BIT)
14908 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14909
14910 output_address (x);
14911 return;
14912
14913 case 'L':
14914 if (ASSEMBLER_DIALECT == ASM_ATT)
14915 putc ('l', file);
14916 return;
14917
14918 case 'W':
14919 if (ASSEMBLER_DIALECT == ASM_ATT)
14920 putc ('w', file);
14921 return;
14922
14923 case 'B':
14924 if (ASSEMBLER_DIALECT == ASM_ATT)
14925 putc ('b', file);
14926 return;
14927
14928 case 'Q':
14929 if (ASSEMBLER_DIALECT == ASM_ATT)
14930 putc ('l', file);
14931 return;
14932
14933 case 'S':
14934 if (ASSEMBLER_DIALECT == ASM_ATT)
14935 putc ('s', file);
14936 return;
14937
14938 case 'T':
14939 if (ASSEMBLER_DIALECT == ASM_ATT)
14940 putc ('t', file);
14941 return;
14942
14943 case 'O':
14944 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14945 if (ASSEMBLER_DIALECT != ASM_ATT)
14946 return;
14947
14948 switch (GET_MODE_SIZE (GET_MODE (x)))
14949 {
14950 case 2:
14951 putc ('w', file);
14952 break;
14953
14954 case 4:
14955 putc ('l', file);
14956 break;
14957
14958 case 8:
14959 putc ('q', file);
14960 break;
14961
14962 default:
14963 output_operand_lossage
14964 ("invalid operand size for operand code 'O'");
14965 return;
14966 }
14967
14968 putc ('.', file);
14969 #endif
14970 return;
14971
14972 case 'z':
14973 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14974 {
14975 /* Opcodes don't get size suffixes if using Intel opcodes. */
14976 if (ASSEMBLER_DIALECT == ASM_INTEL)
14977 return;
14978
14979 switch (GET_MODE_SIZE (GET_MODE (x)))
14980 {
14981 case 1:
14982 putc ('b', file);
14983 return;
14984
14985 case 2:
14986 putc ('w', file);
14987 return;
14988
14989 case 4:
14990 putc ('l', file);
14991 return;
14992
14993 case 8:
14994 putc ('q', file);
14995 return;
14996
14997 default:
14998 output_operand_lossage
14999 ("invalid operand size for operand code 'z'");
15000 return;
15001 }
15002 }
15003
15004 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15005 warning
15006 (0, "non-integer operand used with operand code 'z'");
15007 /* FALLTHRU */
15008
15009 case 'Z':
15010 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15011 if (ASSEMBLER_DIALECT == ASM_INTEL)
15012 return;
15013
15014 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15015 {
15016 switch (GET_MODE_SIZE (GET_MODE (x)))
15017 {
15018 case 2:
15019 #ifdef HAVE_AS_IX86_FILDS
15020 putc ('s', file);
15021 #endif
15022 return;
15023
15024 case 4:
15025 putc ('l', file);
15026 return;
15027
15028 case 8:
15029 #ifdef HAVE_AS_IX86_FILDQ
15030 putc ('q', file);
15031 #else
15032 fputs ("ll", file);
15033 #endif
15034 return;
15035
15036 default:
15037 break;
15038 }
15039 }
15040 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15041 {
15042 /* 387 opcodes don't get size suffixes
15043 if the operands are registers. */
15044 if (STACK_REG_P (x))
15045 return;
15046
15047 switch (GET_MODE_SIZE (GET_MODE (x)))
15048 {
15049 case 4:
15050 putc ('s', file);
15051 return;
15052
15053 case 8:
15054 putc ('l', file);
15055 return;
15056
15057 case 12:
15058 case 16:
15059 putc ('t', file);
15060 return;
15061
15062 default:
15063 break;
15064 }
15065 }
15066 else
15067 {
15068 output_operand_lossage
15069 ("invalid operand type used with operand code 'Z'");
15070 return;
15071 }
15072
15073 output_operand_lossage
15074 ("invalid operand size for operand code 'Z'");
15075 return;
15076
15077 case 'd':
15078 case 'b':
15079 case 'w':
15080 case 'k':
15081 case 'q':
15082 case 'h':
15083 case 't':
15084 case 'g':
15085 case 'y':
15086 case 'x':
15087 case 'X':
15088 case 'P':
15089 case 'p':
15090 break;
15091
15092 case 's':
15093 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15094 {
15095 ix86_print_operand (file, x, 0);
15096 fputs (", ", file);
15097 }
15098 return;
15099
15100 case 'Y':
15101 switch (GET_CODE (x))
15102 {
15103 case NE:
15104 fputs ("neq", file);
15105 break;
15106 case EQ:
15107 fputs ("eq", file);
15108 break;
15109 case GE:
15110 case GEU:
15111 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15112 break;
15113 case GT:
15114 case GTU:
15115 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15116 break;
15117 case LE:
15118 case LEU:
15119 fputs ("le", file);
15120 break;
15121 case LT:
15122 case LTU:
15123 fputs ("lt", file);
15124 break;
15125 case UNORDERED:
15126 fputs ("unord", file);
15127 break;
15128 case ORDERED:
15129 fputs ("ord", file);
15130 break;
15131 case UNEQ:
15132 fputs ("ueq", file);
15133 break;
15134 case UNGE:
15135 fputs ("nlt", file);
15136 break;
15137 case UNGT:
15138 fputs ("nle", file);
15139 break;
15140 case UNLE:
15141 fputs ("ule", file);
15142 break;
15143 case UNLT:
15144 fputs ("ult", file);
15145 break;
15146 case LTGT:
15147 fputs ("une", file);
15148 break;
15149 default:
15150 output_operand_lossage ("operand is not a condition code, "
15151 "invalid operand code 'Y'");
15152 return;
15153 }
15154 return;
15155
15156 case 'D':
15157 /* Little bit of braindamage here. The SSE compare instructions
15158 does use completely different names for the comparisons that the
15159 fp conditional moves. */
15160 switch (GET_CODE (x))
15161 {
15162 case UNEQ:
15163 if (TARGET_AVX)
15164 {
15165 fputs ("eq_us", file);
15166 break;
15167 }
15168 case EQ:
15169 fputs ("eq", file);
15170 break;
15171 case UNLT:
15172 if (TARGET_AVX)
15173 {
15174 fputs ("nge", file);
15175 break;
15176 }
15177 case LT:
15178 fputs ("lt", file);
15179 break;
15180 case UNLE:
15181 if (TARGET_AVX)
15182 {
15183 fputs ("ngt", file);
15184 break;
15185 }
15186 case LE:
15187 fputs ("le", file);
15188 break;
15189 case UNORDERED:
15190 fputs ("unord", file);
15191 break;
15192 case LTGT:
15193 if (TARGET_AVX)
15194 {
15195 fputs ("neq_oq", file);
15196 break;
15197 }
15198 case NE:
15199 fputs ("neq", file);
15200 break;
15201 case GE:
15202 if (TARGET_AVX)
15203 {
15204 fputs ("ge", file);
15205 break;
15206 }
15207 case UNGE:
15208 fputs ("nlt", file);
15209 break;
15210 case GT:
15211 if (TARGET_AVX)
15212 {
15213 fputs ("gt", file);
15214 break;
15215 }
15216 case UNGT:
15217 fputs ("nle", file);
15218 break;
15219 case ORDERED:
15220 fputs ("ord", file);
15221 break;
15222 default:
15223 output_operand_lossage ("operand is not a condition code, "
15224 "invalid operand code 'D'");
15225 return;
15226 }
15227 return;
15228
15229 case 'F':
15230 case 'f':
15231 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15232 if (ASSEMBLER_DIALECT == ASM_ATT)
15233 putc ('.', file);
15234 #endif
15235
15236 case 'C':
15237 case 'c':
15238 if (!COMPARISON_P (x))
15239 {
15240 output_operand_lossage ("operand is not a condition code, "
15241 "invalid operand code '%c'", code);
15242 return;
15243 }
15244 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15245 code == 'c' || code == 'f',
15246 code == 'F' || code == 'f',
15247 file);
15248 return;
15249
15250 case 'H':
15251 if (!offsettable_memref_p (x))
15252 {
15253 output_operand_lossage ("operand is not an offsettable memory "
15254 "reference, invalid operand code 'H'");
15255 return;
15256 }
15257 /* It doesn't actually matter what mode we use here, as we're
15258 only going to use this for printing. */
15259 x = adjust_address_nv (x, DImode, 8);
15260 /* Output 'qword ptr' for intel assembler dialect. */
15261 if (ASSEMBLER_DIALECT == ASM_INTEL)
15262 code = 'q';
15263 break;
15264
15265 case 'K':
15266 gcc_assert (CONST_INT_P (x));
15267
15268 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15269 #ifdef HAVE_AS_IX86_HLE
15270 fputs ("xacquire ", file);
15271 #else
15272 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15273 #endif
15274 else if (INTVAL (x) & IX86_HLE_RELEASE)
15275 #ifdef HAVE_AS_IX86_HLE
15276 fputs ("xrelease ", file);
15277 #else
15278 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15279 #endif
15280 /* We do not want to print value of the operand. */
15281 return;
15282
15283 case 'N':
15284 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15285 fputs ("{z}", file);
15286 return;
15287
15288 case 'r':
15289 gcc_assert (CONST_INT_P (x));
15290 gcc_assert (INTVAL (x) == ROUND_SAE);
15291
15292 if (ASSEMBLER_DIALECT == ASM_INTEL)
15293 fputs (", ", file);
15294
15295 fputs ("{sae}", file);
15296
15297 if (ASSEMBLER_DIALECT == ASM_ATT)
15298 fputs (", ", file);
15299
15300 return;
15301
15302 case 'R':
15303 gcc_assert (CONST_INT_P (x));
15304
15305 if (ASSEMBLER_DIALECT == ASM_INTEL)
15306 fputs (", ", file);
15307
15308 switch (INTVAL (x))
15309 {
15310 case ROUND_NEAREST_INT | ROUND_SAE:
15311 fputs ("{rn-sae}", file);
15312 break;
15313 case ROUND_NEG_INF | ROUND_SAE:
15314 fputs ("{rd-sae}", file);
15315 break;
15316 case ROUND_POS_INF | ROUND_SAE:
15317 fputs ("{ru-sae}", file);
15318 break;
15319 case ROUND_ZERO | ROUND_SAE:
15320 fputs ("{rz-sae}", file);
15321 break;
15322 default:
15323 gcc_unreachable ();
15324 }
15325
15326 if (ASSEMBLER_DIALECT == ASM_ATT)
15327 fputs (", ", file);
15328
15329 return;
15330
15331 case '*':
15332 if (ASSEMBLER_DIALECT == ASM_ATT)
15333 putc ('*', file);
15334 return;
15335
15336 case '&':
15337 {
15338 const char *name = get_some_local_dynamic_name ();
15339 if (name == NULL)
15340 output_operand_lossage ("'%%&' used without any "
15341 "local dynamic TLS references");
15342 else
15343 assemble_name (file, name);
15344 return;
15345 }
15346
15347 case '+':
15348 {
15349 rtx x;
15350
15351 if (!optimize
15352 || optimize_function_for_size_p (cfun)
15353 || !TARGET_BRANCH_PREDICTION_HINTS)
15354 return;
15355
15356 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15357 if (x)
15358 {
15359 int pred_val = XINT (x, 0);
15360
15361 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15362 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15363 {
15364 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15365 bool cputaken
15366 = final_forward_branch_p (current_output_insn) == 0;
15367
15368 /* Emit hints only in the case default branch prediction
15369 heuristics would fail. */
15370 if (taken != cputaken)
15371 {
15372 /* We use 3e (DS) prefix for taken branches and
15373 2e (CS) prefix for not taken branches. */
15374 if (taken)
15375 fputs ("ds ; ", file);
15376 else
15377 fputs ("cs ; ", file);
15378 }
15379 }
15380 }
15381 return;
15382 }
15383
15384 case ';':
15385 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15386 putc (';', file);
15387 #endif
15388 return;
15389
15390 case '@':
15391 if (ASSEMBLER_DIALECT == ASM_ATT)
15392 putc ('%', file);
15393
15394 /* The kernel uses a different segment register for performance
15395 reasons; a system call would not have to trash the userspace
15396 segment register, which would be expensive. */
15397 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15398 fputs ("fs", file);
15399 else
15400 fputs ("gs", file);
15401 return;
15402
15403 case '~':
15404 putc (TARGET_AVX2 ? 'i' : 'f', file);
15405 return;
15406
15407 case '^':
15408 if (TARGET_64BIT && Pmode != word_mode)
15409 fputs ("addr32 ", file);
15410 return;
15411
15412 default:
15413 output_operand_lossage ("invalid operand code '%c'", code);
15414 }
15415 }
15416
15417 if (REG_P (x))
15418 print_reg (x, code, file);
15419
15420 else if (MEM_P (x))
15421 {
15422 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15423 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15424 && GET_MODE (x) != BLKmode)
15425 {
15426 const char * size;
15427 switch (GET_MODE_SIZE (GET_MODE (x)))
15428 {
15429 case 1: size = "BYTE"; break;
15430 case 2: size = "WORD"; break;
15431 case 4: size = "DWORD"; break;
15432 case 8: size = "QWORD"; break;
15433 case 12: size = "TBYTE"; break;
15434 case 16:
15435 if (GET_MODE (x) == XFmode)
15436 size = "TBYTE";
15437 else
15438 size = "XMMWORD";
15439 break;
15440 case 32: size = "YMMWORD"; break;
15441 case 64: size = "ZMMWORD"; break;
15442 default:
15443 gcc_unreachable ();
15444 }
15445
15446 /* Check for explicit size override (codes 'b', 'w', 'k',
15447 'q' and 'x') */
15448 if (code == 'b')
15449 size = "BYTE";
15450 else if (code == 'w')
15451 size = "WORD";
15452 else if (code == 'k')
15453 size = "DWORD";
15454 else if (code == 'q')
15455 size = "QWORD";
15456 else if (code == 'x')
15457 size = "XMMWORD";
15458
15459 fputs (size, file);
15460 fputs (" PTR ", file);
15461 }
15462
15463 x = XEXP (x, 0);
15464 /* Avoid (%rip) for call operands. */
15465 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15466 && !CONST_INT_P (x))
15467 output_addr_const (file, x);
15468 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15469 output_operand_lossage ("invalid constraints for operand");
15470 else
15471 output_address (x);
15472 }
15473
15474 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15475 {
15476 REAL_VALUE_TYPE r;
15477 long l;
15478
15479 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15480 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15481
15482 if (ASSEMBLER_DIALECT == ASM_ATT)
15483 putc ('$', file);
15484 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15485 if (code == 'q')
15486 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15487 (unsigned long long) (int) l);
15488 else
15489 fprintf (file, "0x%08x", (unsigned int) l);
15490 }
15491
15492 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15493 {
15494 REAL_VALUE_TYPE r;
15495 long l[2];
15496
15497 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15498 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15499
15500 if (ASSEMBLER_DIALECT == ASM_ATT)
15501 putc ('$', file);
15502 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15503 }
15504
15505 /* These float cases don't actually occur as immediate operands. */
15506 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15507 {
15508 char dstr[30];
15509
15510 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15511 fputs (dstr, file);
15512 }
15513
15514 else
15515 {
15516 /* We have patterns that allow zero sets of memory, for instance.
15517 In 64-bit mode, we should probably support all 8-byte vectors,
15518 since we can in fact encode that into an immediate. */
15519 if (GET_CODE (x) == CONST_VECTOR)
15520 {
15521 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15522 x = const0_rtx;
15523 }
15524
15525 if (code != 'P' && code != 'p')
15526 {
15527 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15528 {
15529 if (ASSEMBLER_DIALECT == ASM_ATT)
15530 putc ('$', file);
15531 }
15532 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15533 || GET_CODE (x) == LABEL_REF)
15534 {
15535 if (ASSEMBLER_DIALECT == ASM_ATT)
15536 putc ('$', file);
15537 else
15538 fputs ("OFFSET FLAT:", file);
15539 }
15540 }
15541 if (CONST_INT_P (x))
15542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15543 else if (flag_pic || MACHOPIC_INDIRECT)
15544 output_pic_addr_const (file, x, code);
15545 else
15546 output_addr_const (file, x);
15547 }
15548 }
15549
15550 static bool
15551 ix86_print_operand_punct_valid_p (unsigned char code)
15552 {
15553 return (code == '@' || code == '*' || code == '+' || code == '&'
15554 || code == ';' || code == '~' || code == '^');
15555 }
15556 \f
15557 /* Print a memory operand whose address is ADDR. */
15558
15559 static void
15560 ix86_print_operand_address (FILE *file, rtx addr)
15561 {
15562 struct ix86_address parts;
15563 rtx base, index, disp;
15564 int scale;
15565 int ok;
15566 bool vsib = false;
15567 int code = 0;
15568
15569 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15570 {
15571 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15572 gcc_assert (parts.index == NULL_RTX);
15573 parts.index = XVECEXP (addr, 0, 1);
15574 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15575 addr = XVECEXP (addr, 0, 0);
15576 vsib = true;
15577 }
15578 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15579 {
15580 gcc_assert (TARGET_64BIT);
15581 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15582 code = 'q';
15583 }
15584 else
15585 ok = ix86_decompose_address (addr, &parts);
15586
15587 gcc_assert (ok);
15588
15589 base = parts.base;
15590 index = parts.index;
15591 disp = parts.disp;
15592 scale = parts.scale;
15593
15594 switch (parts.seg)
15595 {
15596 case SEG_DEFAULT:
15597 break;
15598 case SEG_FS:
15599 case SEG_GS:
15600 if (ASSEMBLER_DIALECT == ASM_ATT)
15601 putc ('%', file);
15602 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15603 break;
15604 default:
15605 gcc_unreachable ();
15606 }
15607
15608 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15609 if (TARGET_64BIT && !base && !index)
15610 {
15611 rtx symbol = disp;
15612
15613 if (GET_CODE (disp) == CONST
15614 && GET_CODE (XEXP (disp, 0)) == PLUS
15615 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15616 symbol = XEXP (XEXP (disp, 0), 0);
15617
15618 if (GET_CODE (symbol) == LABEL_REF
15619 || (GET_CODE (symbol) == SYMBOL_REF
15620 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15621 base = pc_rtx;
15622 }
15623 if (!base && !index)
15624 {
15625 /* Displacement only requires special attention. */
15626
15627 if (CONST_INT_P (disp))
15628 {
15629 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15630 fputs ("ds:", file);
15631 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15632 }
15633 else if (flag_pic)
15634 output_pic_addr_const (file, disp, 0);
15635 else
15636 output_addr_const (file, disp);
15637 }
15638 else
15639 {
15640 /* Print SImode register names to force addr32 prefix. */
15641 if (SImode_address_operand (addr, VOIDmode))
15642 {
15643 #ifdef ENABLE_CHECKING
15644 gcc_assert (TARGET_64BIT);
15645 switch (GET_CODE (addr))
15646 {
15647 case SUBREG:
15648 gcc_assert (GET_MODE (addr) == SImode);
15649 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15650 break;
15651 case ZERO_EXTEND:
15652 case AND:
15653 gcc_assert (GET_MODE (addr) == DImode);
15654 break;
15655 default:
15656 gcc_unreachable ();
15657 }
15658 #endif
15659 gcc_assert (!code);
15660 code = 'k';
15661 }
15662 else if (code == 0
15663 && TARGET_X32
15664 && disp
15665 && CONST_INT_P (disp)
15666 && INTVAL (disp) < -16*1024*1024)
15667 {
15668 /* X32 runs in 64-bit mode, where displacement, DISP, in
15669 address DISP(%r64), is encoded as 32-bit immediate sign-
15670 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15671 address is %r64 + 0xffffffffbffffd00. When %r64 <
15672 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15673 which is invalid for x32. The correct address is %r64
15674 - 0x40000300 == 0xf7ffdd64. To properly encode
15675 -0x40000300(%r64) for x32, we zero-extend negative
15676 displacement by forcing addr32 prefix which truncates
15677 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15678 zero-extend all negative displacements, including -1(%rsp).
15679 However, for small negative displacements, sign-extension
15680 won't cause overflow. We only zero-extend negative
15681 displacements if they < -16*1024*1024, which is also used
15682 to check legitimate address displacements for PIC. */
15683 code = 'k';
15684 }
15685
15686 if (ASSEMBLER_DIALECT == ASM_ATT)
15687 {
15688 if (disp)
15689 {
15690 if (flag_pic)
15691 output_pic_addr_const (file, disp, 0);
15692 else if (GET_CODE (disp) == LABEL_REF)
15693 output_asm_label (disp);
15694 else
15695 output_addr_const (file, disp);
15696 }
15697
15698 putc ('(', file);
15699 if (base)
15700 print_reg (base, code, file);
15701 if (index)
15702 {
15703 putc (',', file);
15704 print_reg (index, vsib ? 0 : code, file);
15705 if (scale != 1 || vsib)
15706 fprintf (file, ",%d", scale);
15707 }
15708 putc (')', file);
15709 }
15710 else
15711 {
15712 rtx offset = NULL_RTX;
15713
15714 if (disp)
15715 {
15716 /* Pull out the offset of a symbol; print any symbol itself. */
15717 if (GET_CODE (disp) == CONST
15718 && GET_CODE (XEXP (disp, 0)) == PLUS
15719 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15720 {
15721 offset = XEXP (XEXP (disp, 0), 1);
15722 disp = gen_rtx_CONST (VOIDmode,
15723 XEXP (XEXP (disp, 0), 0));
15724 }
15725
15726 if (flag_pic)
15727 output_pic_addr_const (file, disp, 0);
15728 else if (GET_CODE (disp) == LABEL_REF)
15729 output_asm_label (disp);
15730 else if (CONST_INT_P (disp))
15731 offset = disp;
15732 else
15733 output_addr_const (file, disp);
15734 }
15735
15736 putc ('[', file);
15737 if (base)
15738 {
15739 print_reg (base, code, file);
15740 if (offset)
15741 {
15742 if (INTVAL (offset) >= 0)
15743 putc ('+', file);
15744 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15745 }
15746 }
15747 else if (offset)
15748 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15749 else
15750 putc ('0', file);
15751
15752 if (index)
15753 {
15754 putc ('+', file);
15755 print_reg (index, vsib ? 0 : code, file);
15756 if (scale != 1 || vsib)
15757 fprintf (file, "*%d", scale);
15758 }
15759 putc (']', file);
15760 }
15761 }
15762 }
15763
15764 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15765
15766 static bool
15767 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15768 {
15769 rtx op;
15770
15771 if (GET_CODE (x) != UNSPEC)
15772 return false;
15773
15774 op = XVECEXP (x, 0, 0);
15775 switch (XINT (x, 1))
15776 {
15777 case UNSPEC_GOTTPOFF:
15778 output_addr_const (file, op);
15779 /* FIXME: This might be @TPOFF in Sun ld. */
15780 fputs ("@gottpoff", file);
15781 break;
15782 case UNSPEC_TPOFF:
15783 output_addr_const (file, op);
15784 fputs ("@tpoff", file);
15785 break;
15786 case UNSPEC_NTPOFF:
15787 output_addr_const (file, op);
15788 if (TARGET_64BIT)
15789 fputs ("@tpoff", file);
15790 else
15791 fputs ("@ntpoff", file);
15792 break;
15793 case UNSPEC_DTPOFF:
15794 output_addr_const (file, op);
15795 fputs ("@dtpoff", file);
15796 break;
15797 case UNSPEC_GOTNTPOFF:
15798 output_addr_const (file, op);
15799 if (TARGET_64BIT)
15800 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15801 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15802 else
15803 fputs ("@gotntpoff", file);
15804 break;
15805 case UNSPEC_INDNTPOFF:
15806 output_addr_const (file, op);
15807 fputs ("@indntpoff", file);
15808 break;
15809 #if TARGET_MACHO
15810 case UNSPEC_MACHOPIC_OFFSET:
15811 output_addr_const (file, op);
15812 putc ('-', file);
15813 machopic_output_function_base_name (file);
15814 break;
15815 #endif
15816
15817 case UNSPEC_STACK_CHECK:
15818 {
15819 int offset;
15820
15821 gcc_assert (flag_split_stack);
15822
15823 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15824 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15825 #else
15826 gcc_unreachable ();
15827 #endif
15828
15829 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15830 }
15831 break;
15832
15833 default:
15834 return false;
15835 }
15836
15837 return true;
15838 }
15839 \f
15840 /* Split one or more double-mode RTL references into pairs of half-mode
15841 references. The RTL can be REG, offsettable MEM, integer constant, or
15842 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15843 split and "num" is its length. lo_half and hi_half are output arrays
15844 that parallel "operands". */
15845
15846 void
15847 split_double_mode (enum machine_mode mode, rtx operands[],
15848 int num, rtx lo_half[], rtx hi_half[])
15849 {
15850 enum machine_mode half_mode;
15851 unsigned int byte;
15852
15853 switch (mode)
15854 {
15855 case TImode:
15856 half_mode = DImode;
15857 break;
15858 case DImode:
15859 half_mode = SImode;
15860 break;
15861 default:
15862 gcc_unreachable ();
15863 }
15864
15865 byte = GET_MODE_SIZE (half_mode);
15866
15867 while (num--)
15868 {
15869 rtx op = operands[num];
15870
15871 /* simplify_subreg refuse to split volatile memory addresses,
15872 but we still have to handle it. */
15873 if (MEM_P (op))
15874 {
15875 lo_half[num] = adjust_address (op, half_mode, 0);
15876 hi_half[num] = adjust_address (op, half_mode, byte);
15877 }
15878 else
15879 {
15880 lo_half[num] = simplify_gen_subreg (half_mode, op,
15881 GET_MODE (op) == VOIDmode
15882 ? mode : GET_MODE (op), 0);
15883 hi_half[num] = simplify_gen_subreg (half_mode, op,
15884 GET_MODE (op) == VOIDmode
15885 ? mode : GET_MODE (op), byte);
15886 }
15887 }
15888 }
15889 \f
15890 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15891 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15892 is the expression of the binary operation. The output may either be
15893 emitted here, or returned to the caller, like all output_* functions.
15894
15895 There is no guarantee that the operands are the same mode, as they
15896 might be within FLOAT or FLOAT_EXTEND expressions. */
15897
15898 #ifndef SYSV386_COMPAT
15899 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15900 wants to fix the assemblers because that causes incompatibility
15901 with gcc. No-one wants to fix gcc because that causes
15902 incompatibility with assemblers... You can use the option of
15903 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15904 #define SYSV386_COMPAT 1
15905 #endif
15906
15907 const char *
15908 output_387_binary_op (rtx insn, rtx *operands)
15909 {
15910 static char buf[40];
15911 const char *p;
15912 const char *ssep;
15913 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15914
15915 #ifdef ENABLE_CHECKING
15916 /* Even if we do not want to check the inputs, this documents input
15917 constraints. Which helps in understanding the following code. */
15918 if (STACK_REG_P (operands[0])
15919 && ((REG_P (operands[1])
15920 && REGNO (operands[0]) == REGNO (operands[1])
15921 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15922 || (REG_P (operands[2])
15923 && REGNO (operands[0]) == REGNO (operands[2])
15924 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15925 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15926 ; /* ok */
15927 else
15928 gcc_assert (is_sse);
15929 #endif
15930
15931 switch (GET_CODE (operands[3]))
15932 {
15933 case PLUS:
15934 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15935 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15936 p = "fiadd";
15937 else
15938 p = "fadd";
15939 ssep = "vadd";
15940 break;
15941
15942 case MINUS:
15943 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15944 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15945 p = "fisub";
15946 else
15947 p = "fsub";
15948 ssep = "vsub";
15949 break;
15950
15951 case MULT:
15952 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15953 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15954 p = "fimul";
15955 else
15956 p = "fmul";
15957 ssep = "vmul";
15958 break;
15959
15960 case DIV:
15961 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15962 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15963 p = "fidiv";
15964 else
15965 p = "fdiv";
15966 ssep = "vdiv";
15967 break;
15968
15969 default:
15970 gcc_unreachable ();
15971 }
15972
15973 if (is_sse)
15974 {
15975 if (TARGET_AVX)
15976 {
15977 strcpy (buf, ssep);
15978 if (GET_MODE (operands[0]) == SFmode)
15979 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15980 else
15981 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15982 }
15983 else
15984 {
15985 strcpy (buf, ssep + 1);
15986 if (GET_MODE (operands[0]) == SFmode)
15987 strcat (buf, "ss\t{%2, %0|%0, %2}");
15988 else
15989 strcat (buf, "sd\t{%2, %0|%0, %2}");
15990 }
15991 return buf;
15992 }
15993 strcpy (buf, p);
15994
15995 switch (GET_CODE (operands[3]))
15996 {
15997 case MULT:
15998 case PLUS:
15999 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16000 {
16001 rtx temp = operands[2];
16002 operands[2] = operands[1];
16003 operands[1] = temp;
16004 }
16005
16006 /* know operands[0] == operands[1]. */
16007
16008 if (MEM_P (operands[2]))
16009 {
16010 p = "%Z2\t%2";
16011 break;
16012 }
16013
16014 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16015 {
16016 if (STACK_TOP_P (operands[0]))
16017 /* How is it that we are storing to a dead operand[2]?
16018 Well, presumably operands[1] is dead too. We can't
16019 store the result to st(0) as st(0) gets popped on this
16020 instruction. Instead store to operands[2] (which I
16021 think has to be st(1)). st(1) will be popped later.
16022 gcc <= 2.8.1 didn't have this check and generated
16023 assembly code that the Unixware assembler rejected. */
16024 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16025 else
16026 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16027 break;
16028 }
16029
16030 if (STACK_TOP_P (operands[0]))
16031 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16032 else
16033 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16034 break;
16035
16036 case MINUS:
16037 case DIV:
16038 if (MEM_P (operands[1]))
16039 {
16040 p = "r%Z1\t%1";
16041 break;
16042 }
16043
16044 if (MEM_P (operands[2]))
16045 {
16046 p = "%Z2\t%2";
16047 break;
16048 }
16049
16050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16051 {
16052 #if SYSV386_COMPAT
16053 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16054 derived assemblers, confusingly reverse the direction of
16055 the operation for fsub{r} and fdiv{r} when the
16056 destination register is not st(0). The Intel assembler
16057 doesn't have this brain damage. Read !SYSV386_COMPAT to
16058 figure out what the hardware really does. */
16059 if (STACK_TOP_P (operands[0]))
16060 p = "{p\t%0, %2|rp\t%2, %0}";
16061 else
16062 p = "{rp\t%2, %0|p\t%0, %2}";
16063 #else
16064 if (STACK_TOP_P (operands[0]))
16065 /* As above for fmul/fadd, we can't store to st(0). */
16066 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16067 else
16068 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16069 #endif
16070 break;
16071 }
16072
16073 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16074 {
16075 #if SYSV386_COMPAT
16076 if (STACK_TOP_P (operands[0]))
16077 p = "{rp\t%0, %1|p\t%1, %0}";
16078 else
16079 p = "{p\t%1, %0|rp\t%0, %1}";
16080 #else
16081 if (STACK_TOP_P (operands[0]))
16082 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16083 else
16084 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16085 #endif
16086 break;
16087 }
16088
16089 if (STACK_TOP_P (operands[0]))
16090 {
16091 if (STACK_TOP_P (operands[1]))
16092 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16093 else
16094 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16095 break;
16096 }
16097 else if (STACK_TOP_P (operands[1]))
16098 {
16099 #if SYSV386_COMPAT
16100 p = "{\t%1, %0|r\t%0, %1}";
16101 #else
16102 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16103 #endif
16104 }
16105 else
16106 {
16107 #if SYSV386_COMPAT
16108 p = "{r\t%2, %0|\t%0, %2}";
16109 #else
16110 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16111 #endif
16112 }
16113 break;
16114
16115 default:
16116 gcc_unreachable ();
16117 }
16118
16119 strcat (buf, p);
16120 return buf;
16121 }
16122
16123 /* Check if a 256bit AVX register is referenced inside of EXP. */
16124
16125 static bool
16126 ix86_check_avx256_register (const_rtx exp)
16127 {
16128 if (GET_CODE (exp) == SUBREG)
16129 exp = SUBREG_REG (exp);
16130
16131 return (REG_P (exp)
16132 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16133 }
16134
16135 /* Return needed mode for entity in optimize_mode_switching pass. */
16136
16137 static int
16138 ix86_avx_u128_mode_needed (rtx_insn *insn)
16139 {
16140 if (CALL_P (insn))
16141 {
16142 rtx link;
16143
16144 /* Needed mode is set to AVX_U128_CLEAN if there are
16145 no 256bit modes used in function arguments. */
16146 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16147 link;
16148 link = XEXP (link, 1))
16149 {
16150 if (GET_CODE (XEXP (link, 0)) == USE)
16151 {
16152 rtx arg = XEXP (XEXP (link, 0), 0);
16153
16154 if (ix86_check_avx256_register (arg))
16155 return AVX_U128_DIRTY;
16156 }
16157 }
16158
16159 return AVX_U128_CLEAN;
16160 }
16161
16162 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16163 changes state only when a 256bit register is written to, but we need
16164 to prevent the compiler from moving optimal insertion point above
16165 eventual read from 256bit register. */
16166 subrtx_iterator::array_type array;
16167 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16168 if (ix86_check_avx256_register (*iter))
16169 return AVX_U128_DIRTY;
16170
16171 return AVX_U128_ANY;
16172 }
16173
16174 /* Return mode that i387 must be switched into
16175 prior to the execution of insn. */
16176
16177 static int
16178 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16179 {
16180 enum attr_i387_cw mode;
16181
16182 /* The mode UNINITIALIZED is used to store control word after a
16183 function call or ASM pattern. The mode ANY specify that function
16184 has no requirements on the control word and make no changes in the
16185 bits we are interested in. */
16186
16187 if (CALL_P (insn)
16188 || (NONJUMP_INSN_P (insn)
16189 && (asm_noperands (PATTERN (insn)) >= 0
16190 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16191 return I387_CW_UNINITIALIZED;
16192
16193 if (recog_memoized (insn) < 0)
16194 return I387_CW_ANY;
16195
16196 mode = get_attr_i387_cw (insn);
16197
16198 switch (entity)
16199 {
16200 case I387_TRUNC:
16201 if (mode == I387_CW_TRUNC)
16202 return mode;
16203 break;
16204
16205 case I387_FLOOR:
16206 if (mode == I387_CW_FLOOR)
16207 return mode;
16208 break;
16209
16210 case I387_CEIL:
16211 if (mode == I387_CW_CEIL)
16212 return mode;
16213 break;
16214
16215 case I387_MASK_PM:
16216 if (mode == I387_CW_MASK_PM)
16217 return mode;
16218 break;
16219
16220 default:
16221 gcc_unreachable ();
16222 }
16223
16224 return I387_CW_ANY;
16225 }
16226
16227 /* Return mode that entity must be switched into
16228 prior to the execution of insn. */
16229
16230 static int
16231 ix86_mode_needed (int entity, rtx_insn *insn)
16232 {
16233 switch (entity)
16234 {
16235 case AVX_U128:
16236 return ix86_avx_u128_mode_needed (insn);
16237 case I387_TRUNC:
16238 case I387_FLOOR:
16239 case I387_CEIL:
16240 case I387_MASK_PM:
16241 return ix86_i387_mode_needed (entity, insn);
16242 default:
16243 gcc_unreachable ();
16244 }
16245 return 0;
16246 }
16247
16248 /* Check if a 256bit AVX register is referenced in stores. */
16249
16250 static void
16251 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16252 {
16253 if (ix86_check_avx256_register (dest))
16254 {
16255 bool *used = (bool *) data;
16256 *used = true;
16257 }
16258 }
16259
16260 /* Calculate mode of upper 128bit AVX registers after the insn. */
16261
16262 static int
16263 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16264 {
16265 rtx pat = PATTERN (insn);
16266
16267 if (vzeroupper_operation (pat, VOIDmode)
16268 || vzeroall_operation (pat, VOIDmode))
16269 return AVX_U128_CLEAN;
16270
16271 /* We know that state is clean after CALL insn if there are no
16272 256bit registers used in the function return register. */
16273 if (CALL_P (insn))
16274 {
16275 bool avx_reg256_found = false;
16276 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16277
16278 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16279 }
16280
16281 /* Otherwise, return current mode. Remember that if insn
16282 references AVX 256bit registers, the mode was already changed
16283 to DIRTY from MODE_NEEDED. */
16284 return mode;
16285 }
16286
16287 /* Return the mode that an insn results in. */
16288
16289 int
16290 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16291 {
16292 switch (entity)
16293 {
16294 case AVX_U128:
16295 return ix86_avx_u128_mode_after (mode, insn);
16296 case I387_TRUNC:
16297 case I387_FLOOR:
16298 case I387_CEIL:
16299 case I387_MASK_PM:
16300 return mode;
16301 default:
16302 gcc_unreachable ();
16303 }
16304 }
16305
16306 static int
16307 ix86_avx_u128_mode_entry (void)
16308 {
16309 tree arg;
16310
16311 /* Entry mode is set to AVX_U128_DIRTY if there are
16312 256bit modes used in function arguments. */
16313 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16314 arg = TREE_CHAIN (arg))
16315 {
16316 rtx incoming = DECL_INCOMING_RTL (arg);
16317
16318 if (incoming && ix86_check_avx256_register (incoming))
16319 return AVX_U128_DIRTY;
16320 }
16321
16322 return AVX_U128_CLEAN;
16323 }
16324
16325 /* Return a mode that ENTITY is assumed to be
16326 switched to at function entry. */
16327
16328 static int
16329 ix86_mode_entry (int entity)
16330 {
16331 switch (entity)
16332 {
16333 case AVX_U128:
16334 return ix86_avx_u128_mode_entry ();
16335 case I387_TRUNC:
16336 case I387_FLOOR:
16337 case I387_CEIL:
16338 case I387_MASK_PM:
16339 return I387_CW_ANY;
16340 default:
16341 gcc_unreachable ();
16342 }
16343 }
16344
16345 static int
16346 ix86_avx_u128_mode_exit (void)
16347 {
16348 rtx reg = crtl->return_rtx;
16349
16350 /* Exit mode is set to AVX_U128_DIRTY if there are
16351 256bit modes used in the function return register. */
16352 if (reg && ix86_check_avx256_register (reg))
16353 return AVX_U128_DIRTY;
16354
16355 return AVX_U128_CLEAN;
16356 }
16357
16358 /* Return a mode that ENTITY is assumed to be
16359 switched to at function exit. */
16360
16361 static int
16362 ix86_mode_exit (int entity)
16363 {
16364 switch (entity)
16365 {
16366 case AVX_U128:
16367 return ix86_avx_u128_mode_exit ();
16368 case I387_TRUNC:
16369 case I387_FLOOR:
16370 case I387_CEIL:
16371 case I387_MASK_PM:
16372 return I387_CW_ANY;
16373 default:
16374 gcc_unreachable ();
16375 }
16376 }
16377
16378 static int
16379 ix86_mode_priority (int, int n)
16380 {
16381 return n;
16382 }
16383
16384 /* Output code to initialize control word copies used by trunc?f?i and
16385 rounding patterns. CURRENT_MODE is set to current control word,
16386 while NEW_MODE is set to new control word. */
16387
16388 static void
16389 emit_i387_cw_initialization (int mode)
16390 {
16391 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16392 rtx new_mode;
16393
16394 enum ix86_stack_slot slot;
16395
16396 rtx reg = gen_reg_rtx (HImode);
16397
16398 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16399 emit_move_insn (reg, copy_rtx (stored_mode));
16400
16401 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16402 || optimize_insn_for_size_p ())
16403 {
16404 switch (mode)
16405 {
16406 case I387_CW_TRUNC:
16407 /* round toward zero (truncate) */
16408 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16409 slot = SLOT_CW_TRUNC;
16410 break;
16411
16412 case I387_CW_FLOOR:
16413 /* round down toward -oo */
16414 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16415 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16416 slot = SLOT_CW_FLOOR;
16417 break;
16418
16419 case I387_CW_CEIL:
16420 /* round up toward +oo */
16421 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16422 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16423 slot = SLOT_CW_CEIL;
16424 break;
16425
16426 case I387_CW_MASK_PM:
16427 /* mask precision exception for nearbyint() */
16428 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16429 slot = SLOT_CW_MASK_PM;
16430 break;
16431
16432 default:
16433 gcc_unreachable ();
16434 }
16435 }
16436 else
16437 {
16438 switch (mode)
16439 {
16440 case I387_CW_TRUNC:
16441 /* round toward zero (truncate) */
16442 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16443 slot = SLOT_CW_TRUNC;
16444 break;
16445
16446 case I387_CW_FLOOR:
16447 /* round down toward -oo */
16448 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16449 slot = SLOT_CW_FLOOR;
16450 break;
16451
16452 case I387_CW_CEIL:
16453 /* round up toward +oo */
16454 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16455 slot = SLOT_CW_CEIL;
16456 break;
16457
16458 case I387_CW_MASK_PM:
16459 /* mask precision exception for nearbyint() */
16460 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16461 slot = SLOT_CW_MASK_PM;
16462 break;
16463
16464 default:
16465 gcc_unreachable ();
16466 }
16467 }
16468
16469 gcc_assert (slot < MAX_386_STACK_LOCALS);
16470
16471 new_mode = assign_386_stack_local (HImode, slot);
16472 emit_move_insn (new_mode, reg);
16473 }
16474
16475 /* Emit vzeroupper. */
16476
16477 void
16478 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16479 {
16480 int i;
16481
16482 /* Cancel automatic vzeroupper insertion if there are
16483 live call-saved SSE registers at the insertion point. */
16484
16485 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16486 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16487 return;
16488
16489 if (TARGET_64BIT)
16490 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16491 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16492 return;
16493
16494 emit_insn (gen_avx_vzeroupper ());
16495 }
16496
16497 /* Generate one or more insns to set ENTITY to MODE. */
16498
16499 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16500 is the set of hard registers live at the point where the insn(s)
16501 are to be inserted. */
16502
16503 static void
16504 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16505 HARD_REG_SET regs_live)
16506 {
16507 switch (entity)
16508 {
16509 case AVX_U128:
16510 if (mode == AVX_U128_CLEAN)
16511 ix86_avx_emit_vzeroupper (regs_live);
16512 break;
16513 case I387_TRUNC:
16514 case I387_FLOOR:
16515 case I387_CEIL:
16516 case I387_MASK_PM:
16517 if (mode != I387_CW_ANY
16518 && mode != I387_CW_UNINITIALIZED)
16519 emit_i387_cw_initialization (mode);
16520 break;
16521 default:
16522 gcc_unreachable ();
16523 }
16524 }
16525
16526 /* Output code for INSN to convert a float to a signed int. OPERANDS
16527 are the insn operands. The output may be [HSD]Imode and the input
16528 operand may be [SDX]Fmode. */
16529
16530 const char *
16531 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16532 {
16533 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16534 int dimode_p = GET_MODE (operands[0]) == DImode;
16535 int round_mode = get_attr_i387_cw (insn);
16536
16537 /* Jump through a hoop or two for DImode, since the hardware has no
16538 non-popping instruction. We used to do this a different way, but
16539 that was somewhat fragile and broke with post-reload splitters. */
16540 if ((dimode_p || fisttp) && !stack_top_dies)
16541 output_asm_insn ("fld\t%y1", operands);
16542
16543 gcc_assert (STACK_TOP_P (operands[1]));
16544 gcc_assert (MEM_P (operands[0]));
16545 gcc_assert (GET_MODE (operands[1]) != TFmode);
16546
16547 if (fisttp)
16548 output_asm_insn ("fisttp%Z0\t%0", operands);
16549 else
16550 {
16551 if (round_mode != I387_CW_ANY)
16552 output_asm_insn ("fldcw\t%3", operands);
16553 if (stack_top_dies || dimode_p)
16554 output_asm_insn ("fistp%Z0\t%0", operands);
16555 else
16556 output_asm_insn ("fist%Z0\t%0", operands);
16557 if (round_mode != I387_CW_ANY)
16558 output_asm_insn ("fldcw\t%2", operands);
16559 }
16560
16561 return "";
16562 }
16563
16564 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16565 have the values zero or one, indicates the ffreep insn's operand
16566 from the OPERANDS array. */
16567
16568 static const char *
16569 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16570 {
16571 if (TARGET_USE_FFREEP)
16572 #ifdef HAVE_AS_IX86_FFREEP
16573 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16574 #else
16575 {
16576 static char retval[32];
16577 int regno = REGNO (operands[opno]);
16578
16579 gcc_assert (STACK_REGNO_P (regno));
16580
16581 regno -= FIRST_STACK_REG;
16582
16583 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16584 return retval;
16585 }
16586 #endif
16587
16588 return opno ? "fstp\t%y1" : "fstp\t%y0";
16589 }
16590
16591
16592 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16593 should be used. UNORDERED_P is true when fucom should be used. */
16594
16595 const char *
16596 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16597 {
16598 int stack_top_dies;
16599 rtx cmp_op0, cmp_op1;
16600 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16601
16602 if (eflags_p)
16603 {
16604 cmp_op0 = operands[0];
16605 cmp_op1 = operands[1];
16606 }
16607 else
16608 {
16609 cmp_op0 = operands[1];
16610 cmp_op1 = operands[2];
16611 }
16612
16613 if (is_sse)
16614 {
16615 if (GET_MODE (operands[0]) == SFmode)
16616 if (unordered_p)
16617 return "%vucomiss\t{%1, %0|%0, %1}";
16618 else
16619 return "%vcomiss\t{%1, %0|%0, %1}";
16620 else
16621 if (unordered_p)
16622 return "%vucomisd\t{%1, %0|%0, %1}";
16623 else
16624 return "%vcomisd\t{%1, %0|%0, %1}";
16625 }
16626
16627 gcc_assert (STACK_TOP_P (cmp_op0));
16628
16629 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16630
16631 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16632 {
16633 if (stack_top_dies)
16634 {
16635 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16636 return output_387_ffreep (operands, 1);
16637 }
16638 else
16639 return "ftst\n\tfnstsw\t%0";
16640 }
16641
16642 if (STACK_REG_P (cmp_op1)
16643 && stack_top_dies
16644 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16645 && REGNO (cmp_op1) != FIRST_STACK_REG)
16646 {
16647 /* If both the top of the 387 stack dies, and the other operand
16648 is also a stack register that dies, then this must be a
16649 `fcompp' float compare */
16650
16651 if (eflags_p)
16652 {
16653 /* There is no double popping fcomi variant. Fortunately,
16654 eflags is immune from the fstp's cc clobbering. */
16655 if (unordered_p)
16656 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16657 else
16658 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16659 return output_387_ffreep (operands, 0);
16660 }
16661 else
16662 {
16663 if (unordered_p)
16664 return "fucompp\n\tfnstsw\t%0";
16665 else
16666 return "fcompp\n\tfnstsw\t%0";
16667 }
16668 }
16669 else
16670 {
16671 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16672
16673 static const char * const alt[16] =
16674 {
16675 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16676 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16677 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16678 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16679
16680 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16681 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16682 NULL,
16683 NULL,
16684
16685 "fcomi\t{%y1, %0|%0, %y1}",
16686 "fcomip\t{%y1, %0|%0, %y1}",
16687 "fucomi\t{%y1, %0|%0, %y1}",
16688 "fucomip\t{%y1, %0|%0, %y1}",
16689
16690 NULL,
16691 NULL,
16692 NULL,
16693 NULL
16694 };
16695
16696 int mask;
16697 const char *ret;
16698
16699 mask = eflags_p << 3;
16700 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16701 mask |= unordered_p << 1;
16702 mask |= stack_top_dies;
16703
16704 gcc_assert (mask < 16);
16705 ret = alt[mask];
16706 gcc_assert (ret);
16707
16708 return ret;
16709 }
16710 }
16711
16712 void
16713 ix86_output_addr_vec_elt (FILE *file, int value)
16714 {
16715 const char *directive = ASM_LONG;
16716
16717 #ifdef ASM_QUAD
16718 if (TARGET_LP64)
16719 directive = ASM_QUAD;
16720 #else
16721 gcc_assert (!TARGET_64BIT);
16722 #endif
16723
16724 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16725 }
16726
16727 void
16728 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16729 {
16730 const char *directive = ASM_LONG;
16731
16732 #ifdef ASM_QUAD
16733 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16734 directive = ASM_QUAD;
16735 #else
16736 gcc_assert (!TARGET_64BIT);
16737 #endif
16738 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16739 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16740 fprintf (file, "%s%s%d-%s%d\n",
16741 directive, LPREFIX, value, LPREFIX, rel);
16742 else if (HAVE_AS_GOTOFF_IN_DATA)
16743 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16744 #if TARGET_MACHO
16745 else if (TARGET_MACHO)
16746 {
16747 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16748 machopic_output_function_base_name (file);
16749 putc ('\n', file);
16750 }
16751 #endif
16752 else
16753 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16754 GOT_SYMBOL_NAME, LPREFIX, value);
16755 }
16756 \f
16757 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16758 for the target. */
16759
16760 void
16761 ix86_expand_clear (rtx dest)
16762 {
16763 rtx tmp;
16764
16765 /* We play register width games, which are only valid after reload. */
16766 gcc_assert (reload_completed);
16767
16768 /* Avoid HImode and its attendant prefix byte. */
16769 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16770 dest = gen_rtx_REG (SImode, REGNO (dest));
16771 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16772
16773 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16774 {
16775 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16776 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16777 }
16778
16779 emit_insn (tmp);
16780 }
16781
16782 /* X is an unchanging MEM. If it is a constant pool reference, return
16783 the constant pool rtx, else NULL. */
16784
16785 rtx
16786 maybe_get_pool_constant (rtx x)
16787 {
16788 x = ix86_delegitimize_address (XEXP (x, 0));
16789
16790 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16791 return get_pool_constant (x);
16792
16793 return NULL_RTX;
16794 }
16795
16796 void
16797 ix86_expand_move (enum machine_mode mode, rtx operands[])
16798 {
16799 rtx op0, op1;
16800 enum tls_model model;
16801
16802 op0 = operands[0];
16803 op1 = operands[1];
16804
16805 if (GET_CODE (op1) == SYMBOL_REF)
16806 {
16807 rtx tmp;
16808
16809 model = SYMBOL_REF_TLS_MODEL (op1);
16810 if (model)
16811 {
16812 op1 = legitimize_tls_address (op1, model, true);
16813 op1 = force_operand (op1, op0);
16814 if (op1 == op0)
16815 return;
16816 op1 = convert_to_mode (mode, op1, 1);
16817 }
16818 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16819 op1 = tmp;
16820 }
16821 else if (GET_CODE (op1) == CONST
16822 && GET_CODE (XEXP (op1, 0)) == PLUS
16823 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16824 {
16825 rtx addend = XEXP (XEXP (op1, 0), 1);
16826 rtx symbol = XEXP (XEXP (op1, 0), 0);
16827 rtx tmp;
16828
16829 model = SYMBOL_REF_TLS_MODEL (symbol);
16830 if (model)
16831 tmp = legitimize_tls_address (symbol, model, true);
16832 else
16833 tmp = legitimize_pe_coff_symbol (symbol, true);
16834
16835 if (tmp)
16836 {
16837 tmp = force_operand (tmp, NULL);
16838 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16839 op0, 1, OPTAB_DIRECT);
16840 if (tmp == op0)
16841 return;
16842 op1 = convert_to_mode (mode, tmp, 1);
16843 }
16844 }
16845
16846 if ((flag_pic || MACHOPIC_INDIRECT)
16847 && symbolic_operand (op1, mode))
16848 {
16849 if (TARGET_MACHO && !TARGET_64BIT)
16850 {
16851 #if TARGET_MACHO
16852 /* dynamic-no-pic */
16853 if (MACHOPIC_INDIRECT)
16854 {
16855 rtx temp = ((reload_in_progress
16856 || ((op0 && REG_P (op0))
16857 && mode == Pmode))
16858 ? op0 : gen_reg_rtx (Pmode));
16859 op1 = machopic_indirect_data_reference (op1, temp);
16860 if (MACHOPIC_PURE)
16861 op1 = machopic_legitimize_pic_address (op1, mode,
16862 temp == op1 ? 0 : temp);
16863 }
16864 if (op0 != op1 && GET_CODE (op0) != MEM)
16865 {
16866 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16867 emit_insn (insn);
16868 return;
16869 }
16870 if (GET_CODE (op0) == MEM)
16871 op1 = force_reg (Pmode, op1);
16872 else
16873 {
16874 rtx temp = op0;
16875 if (GET_CODE (temp) != REG)
16876 temp = gen_reg_rtx (Pmode);
16877 temp = legitimize_pic_address (op1, temp);
16878 if (temp == op0)
16879 return;
16880 op1 = temp;
16881 }
16882 /* dynamic-no-pic */
16883 #endif
16884 }
16885 else
16886 {
16887 if (MEM_P (op0))
16888 op1 = force_reg (mode, op1);
16889 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16890 {
16891 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16892 op1 = legitimize_pic_address (op1, reg);
16893 if (op0 == op1)
16894 return;
16895 op1 = convert_to_mode (mode, op1, 1);
16896 }
16897 }
16898 }
16899 else
16900 {
16901 if (MEM_P (op0)
16902 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16903 || !push_operand (op0, mode))
16904 && MEM_P (op1))
16905 op1 = force_reg (mode, op1);
16906
16907 if (push_operand (op0, mode)
16908 && ! general_no_elim_operand (op1, mode))
16909 op1 = copy_to_mode_reg (mode, op1);
16910
16911 /* Force large constants in 64bit compilation into register
16912 to get them CSEed. */
16913 if (can_create_pseudo_p ()
16914 && (mode == DImode) && TARGET_64BIT
16915 && immediate_operand (op1, mode)
16916 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16917 && !register_operand (op0, mode)
16918 && optimize)
16919 op1 = copy_to_mode_reg (mode, op1);
16920
16921 if (can_create_pseudo_p ()
16922 && FLOAT_MODE_P (mode)
16923 && GET_CODE (op1) == CONST_DOUBLE)
16924 {
16925 /* If we are loading a floating point constant to a register,
16926 force the value to memory now, since we'll get better code
16927 out the back end. */
16928
16929 op1 = validize_mem (force_const_mem (mode, op1));
16930 if (!register_operand (op0, mode))
16931 {
16932 rtx temp = gen_reg_rtx (mode);
16933 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16934 emit_move_insn (op0, temp);
16935 return;
16936 }
16937 }
16938 }
16939
16940 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16941 }
16942
16943 void
16944 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16945 {
16946 rtx op0 = operands[0], op1 = operands[1];
16947 unsigned int align = GET_MODE_ALIGNMENT (mode);
16948
16949 if (push_operand (op0, VOIDmode))
16950 op0 = emit_move_resolve_push (mode, op0);
16951
16952 /* Force constants other than zero into memory. We do not know how
16953 the instructions used to build constants modify the upper 64 bits
16954 of the register, once we have that information we may be able
16955 to handle some of them more efficiently. */
16956 if (can_create_pseudo_p ()
16957 && register_operand (op0, mode)
16958 && (CONSTANT_P (op1)
16959 || (GET_CODE (op1) == SUBREG
16960 && CONSTANT_P (SUBREG_REG (op1))))
16961 && !standard_sse_constant_p (op1))
16962 op1 = validize_mem (force_const_mem (mode, op1));
16963
16964 /* We need to check memory alignment for SSE mode since attribute
16965 can make operands unaligned. */
16966 if (can_create_pseudo_p ()
16967 && SSE_REG_MODE_P (mode)
16968 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16969 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16970 {
16971 rtx tmp[2];
16972
16973 /* ix86_expand_vector_move_misalign() does not like constants ... */
16974 if (CONSTANT_P (op1)
16975 || (GET_CODE (op1) == SUBREG
16976 && CONSTANT_P (SUBREG_REG (op1))))
16977 op1 = validize_mem (force_const_mem (mode, op1));
16978
16979 /* ... nor both arguments in memory. */
16980 if (!register_operand (op0, mode)
16981 && !register_operand (op1, mode))
16982 op1 = force_reg (mode, op1);
16983
16984 tmp[0] = op0; tmp[1] = op1;
16985 ix86_expand_vector_move_misalign (mode, tmp);
16986 return;
16987 }
16988
16989 /* Make operand1 a register if it isn't already. */
16990 if (can_create_pseudo_p ()
16991 && !register_operand (op0, mode)
16992 && !register_operand (op1, mode))
16993 {
16994 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16995 return;
16996 }
16997
16998 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16999 }
17000
17001 /* Split 32-byte AVX unaligned load and store if needed. */
17002
17003 static void
17004 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17005 {
17006 rtx m;
17007 rtx (*extract) (rtx, rtx, rtx);
17008 rtx (*load_unaligned) (rtx, rtx);
17009 rtx (*store_unaligned) (rtx, rtx);
17010 enum machine_mode mode;
17011
17012 switch (GET_MODE (op0))
17013 {
17014 default:
17015 gcc_unreachable ();
17016 case V32QImode:
17017 extract = gen_avx_vextractf128v32qi;
17018 load_unaligned = gen_avx_loaddquv32qi;
17019 store_unaligned = gen_avx_storedquv32qi;
17020 mode = V16QImode;
17021 break;
17022 case V8SFmode:
17023 extract = gen_avx_vextractf128v8sf;
17024 load_unaligned = gen_avx_loadups256;
17025 store_unaligned = gen_avx_storeups256;
17026 mode = V4SFmode;
17027 break;
17028 case V4DFmode:
17029 extract = gen_avx_vextractf128v4df;
17030 load_unaligned = gen_avx_loadupd256;
17031 store_unaligned = gen_avx_storeupd256;
17032 mode = V2DFmode;
17033 break;
17034 }
17035
17036 if (MEM_P (op1))
17037 {
17038 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17039 {
17040 rtx r = gen_reg_rtx (mode);
17041 m = adjust_address (op1, mode, 0);
17042 emit_move_insn (r, m);
17043 m = adjust_address (op1, mode, 16);
17044 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17045 emit_move_insn (op0, r);
17046 }
17047 /* Normal *mov<mode>_internal pattern will handle
17048 unaligned loads just fine if misaligned_operand
17049 is true, and without the UNSPEC it can be combined
17050 with arithmetic instructions. */
17051 else if (misaligned_operand (op1, GET_MODE (op1)))
17052 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17053 else
17054 emit_insn (load_unaligned (op0, op1));
17055 }
17056 else if (MEM_P (op0))
17057 {
17058 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17059 {
17060 m = adjust_address (op0, mode, 0);
17061 emit_insn (extract (m, op1, const0_rtx));
17062 m = adjust_address (op0, mode, 16);
17063 emit_insn (extract (m, op1, const1_rtx));
17064 }
17065 else
17066 emit_insn (store_unaligned (op0, op1));
17067 }
17068 else
17069 gcc_unreachable ();
17070 }
17071
17072 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17073 straight to ix86_expand_vector_move. */
17074 /* Code generation for scalar reg-reg moves of single and double precision data:
17075 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17076 movaps reg, reg
17077 else
17078 movss reg, reg
17079 if (x86_sse_partial_reg_dependency == true)
17080 movapd reg, reg
17081 else
17082 movsd reg, reg
17083
17084 Code generation for scalar loads of double precision data:
17085 if (x86_sse_split_regs == true)
17086 movlpd mem, reg (gas syntax)
17087 else
17088 movsd mem, reg
17089
17090 Code generation for unaligned packed loads of single precision data
17091 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17092 if (x86_sse_unaligned_move_optimal)
17093 movups mem, reg
17094
17095 if (x86_sse_partial_reg_dependency == true)
17096 {
17097 xorps reg, reg
17098 movlps mem, reg
17099 movhps mem+8, reg
17100 }
17101 else
17102 {
17103 movlps mem, reg
17104 movhps mem+8, reg
17105 }
17106
17107 Code generation for unaligned packed loads of double precision data
17108 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17109 if (x86_sse_unaligned_move_optimal)
17110 movupd mem, reg
17111
17112 if (x86_sse_split_regs == true)
17113 {
17114 movlpd mem, reg
17115 movhpd mem+8, reg
17116 }
17117 else
17118 {
17119 movsd mem, reg
17120 movhpd mem+8, reg
17121 }
17122 */
17123
17124 void
17125 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17126 {
17127 rtx op0, op1, orig_op0 = NULL_RTX, m;
17128 rtx (*load_unaligned) (rtx, rtx);
17129 rtx (*store_unaligned) (rtx, rtx);
17130
17131 op0 = operands[0];
17132 op1 = operands[1];
17133
17134 if (GET_MODE_SIZE (mode) == 64)
17135 {
17136 switch (GET_MODE_CLASS (mode))
17137 {
17138 case MODE_VECTOR_INT:
17139 case MODE_INT:
17140 if (GET_MODE (op0) != V16SImode)
17141 {
17142 if (!MEM_P (op0))
17143 {
17144 orig_op0 = op0;
17145 op0 = gen_reg_rtx (V16SImode);
17146 }
17147 else
17148 op0 = gen_lowpart (V16SImode, op0);
17149 }
17150 op1 = gen_lowpart (V16SImode, op1);
17151 /* FALLTHRU */
17152
17153 case MODE_VECTOR_FLOAT:
17154 switch (GET_MODE (op0))
17155 {
17156 default:
17157 gcc_unreachable ();
17158 case V16SImode:
17159 load_unaligned = gen_avx512f_loaddquv16si;
17160 store_unaligned = gen_avx512f_storedquv16si;
17161 break;
17162 case V16SFmode:
17163 load_unaligned = gen_avx512f_loadups512;
17164 store_unaligned = gen_avx512f_storeups512;
17165 break;
17166 case V8DFmode:
17167 load_unaligned = gen_avx512f_loadupd512;
17168 store_unaligned = gen_avx512f_storeupd512;
17169 break;
17170 }
17171
17172 if (MEM_P (op1))
17173 emit_insn (load_unaligned (op0, op1));
17174 else if (MEM_P (op0))
17175 emit_insn (store_unaligned (op0, op1));
17176 else
17177 gcc_unreachable ();
17178 if (orig_op0)
17179 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17180 break;
17181
17182 default:
17183 gcc_unreachable ();
17184 }
17185
17186 return;
17187 }
17188
17189 if (TARGET_AVX
17190 && GET_MODE_SIZE (mode) == 32)
17191 {
17192 switch (GET_MODE_CLASS (mode))
17193 {
17194 case MODE_VECTOR_INT:
17195 case MODE_INT:
17196 if (GET_MODE (op0) != V32QImode)
17197 {
17198 if (!MEM_P (op0))
17199 {
17200 orig_op0 = op0;
17201 op0 = gen_reg_rtx (V32QImode);
17202 }
17203 else
17204 op0 = gen_lowpart (V32QImode, op0);
17205 }
17206 op1 = gen_lowpart (V32QImode, op1);
17207 /* FALLTHRU */
17208
17209 case MODE_VECTOR_FLOAT:
17210 ix86_avx256_split_vector_move_misalign (op0, op1);
17211 if (orig_op0)
17212 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17213 break;
17214
17215 default:
17216 gcc_unreachable ();
17217 }
17218
17219 return;
17220 }
17221
17222 if (MEM_P (op1))
17223 {
17224 /* Normal *mov<mode>_internal pattern will handle
17225 unaligned loads just fine if misaligned_operand
17226 is true, and without the UNSPEC it can be combined
17227 with arithmetic instructions. */
17228 if (TARGET_AVX
17229 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17230 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17231 && misaligned_operand (op1, GET_MODE (op1)))
17232 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17233 /* ??? If we have typed data, then it would appear that using
17234 movdqu is the only way to get unaligned data loaded with
17235 integer type. */
17236 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17237 {
17238 if (GET_MODE (op0) != V16QImode)
17239 {
17240 orig_op0 = op0;
17241 op0 = gen_reg_rtx (V16QImode);
17242 }
17243 op1 = gen_lowpart (V16QImode, op1);
17244 /* We will eventually emit movups based on insn attributes. */
17245 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17246 if (orig_op0)
17247 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17248 }
17249 else if (TARGET_SSE2 && mode == V2DFmode)
17250 {
17251 rtx zero;
17252
17253 if (TARGET_AVX
17254 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17255 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17256 || optimize_insn_for_size_p ())
17257 {
17258 /* We will eventually emit movups based on insn attributes. */
17259 emit_insn (gen_sse2_loadupd (op0, op1));
17260 return;
17261 }
17262
17263 /* When SSE registers are split into halves, we can avoid
17264 writing to the top half twice. */
17265 if (TARGET_SSE_SPLIT_REGS)
17266 {
17267 emit_clobber (op0);
17268 zero = op0;
17269 }
17270 else
17271 {
17272 /* ??? Not sure about the best option for the Intel chips.
17273 The following would seem to satisfy; the register is
17274 entirely cleared, breaking the dependency chain. We
17275 then store to the upper half, with a dependency depth
17276 of one. A rumor has it that Intel recommends two movsd
17277 followed by an unpacklpd, but this is unconfirmed. And
17278 given that the dependency depth of the unpacklpd would
17279 still be one, I'm not sure why this would be better. */
17280 zero = CONST0_RTX (V2DFmode);
17281 }
17282
17283 m = adjust_address (op1, DFmode, 0);
17284 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17285 m = adjust_address (op1, DFmode, 8);
17286 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17287 }
17288 else
17289 {
17290 rtx t;
17291
17292 if (TARGET_AVX
17293 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17294 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17295 || optimize_insn_for_size_p ())
17296 {
17297 if (GET_MODE (op0) != V4SFmode)
17298 {
17299 orig_op0 = op0;
17300 op0 = gen_reg_rtx (V4SFmode);
17301 }
17302 op1 = gen_lowpart (V4SFmode, op1);
17303 emit_insn (gen_sse_loadups (op0, op1));
17304 if (orig_op0)
17305 emit_move_insn (orig_op0,
17306 gen_lowpart (GET_MODE (orig_op0), op0));
17307 return;
17308 }
17309
17310 if (mode != V4SFmode)
17311 t = gen_reg_rtx (V4SFmode);
17312 else
17313 t = op0;
17314
17315 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17316 emit_move_insn (t, CONST0_RTX (V4SFmode));
17317 else
17318 emit_clobber (t);
17319
17320 m = adjust_address (op1, V2SFmode, 0);
17321 emit_insn (gen_sse_loadlps (t, t, m));
17322 m = adjust_address (op1, V2SFmode, 8);
17323 emit_insn (gen_sse_loadhps (t, t, m));
17324 if (mode != V4SFmode)
17325 emit_move_insn (op0, gen_lowpart (mode, t));
17326 }
17327 }
17328 else if (MEM_P (op0))
17329 {
17330 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17331 {
17332 op0 = gen_lowpart (V16QImode, op0);
17333 op1 = gen_lowpart (V16QImode, op1);
17334 /* We will eventually emit movups based on insn attributes. */
17335 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17336 }
17337 else if (TARGET_SSE2 && mode == V2DFmode)
17338 {
17339 if (TARGET_AVX
17340 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17341 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17342 || optimize_insn_for_size_p ())
17343 /* We will eventually emit movups based on insn attributes. */
17344 emit_insn (gen_sse2_storeupd (op0, op1));
17345 else
17346 {
17347 m = adjust_address (op0, DFmode, 0);
17348 emit_insn (gen_sse2_storelpd (m, op1));
17349 m = adjust_address (op0, DFmode, 8);
17350 emit_insn (gen_sse2_storehpd (m, op1));
17351 }
17352 }
17353 else
17354 {
17355 if (mode != V4SFmode)
17356 op1 = gen_lowpart (V4SFmode, op1);
17357
17358 if (TARGET_AVX
17359 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17360 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17361 || optimize_insn_for_size_p ())
17362 {
17363 op0 = gen_lowpart (V4SFmode, op0);
17364 emit_insn (gen_sse_storeups (op0, op1));
17365 }
17366 else
17367 {
17368 m = adjust_address (op0, V2SFmode, 0);
17369 emit_insn (gen_sse_storelps (m, op1));
17370 m = adjust_address (op0, V2SFmode, 8);
17371 emit_insn (gen_sse_storehps (m, op1));
17372 }
17373 }
17374 }
17375 else
17376 gcc_unreachable ();
17377 }
17378
17379 /* Helper function of ix86_fixup_binary_operands to canonicalize
17380 operand order. Returns true if the operands should be swapped. */
17381
17382 static bool
17383 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17384 rtx operands[])
17385 {
17386 rtx dst = operands[0];
17387 rtx src1 = operands[1];
17388 rtx src2 = operands[2];
17389
17390 /* If the operation is not commutative, we can't do anything. */
17391 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17392 return false;
17393
17394 /* Highest priority is that src1 should match dst. */
17395 if (rtx_equal_p (dst, src1))
17396 return false;
17397 if (rtx_equal_p (dst, src2))
17398 return true;
17399
17400 /* Next highest priority is that immediate constants come second. */
17401 if (immediate_operand (src2, mode))
17402 return false;
17403 if (immediate_operand (src1, mode))
17404 return true;
17405
17406 /* Lowest priority is that memory references should come second. */
17407 if (MEM_P (src2))
17408 return false;
17409 if (MEM_P (src1))
17410 return true;
17411
17412 return false;
17413 }
17414
17415
17416 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17417 destination to use for the operation. If different from the true
17418 destination in operands[0], a copy operation will be required. */
17419
17420 rtx
17421 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17422 rtx operands[])
17423 {
17424 rtx dst = operands[0];
17425 rtx src1 = operands[1];
17426 rtx src2 = operands[2];
17427
17428 /* Canonicalize operand order. */
17429 if (ix86_swap_binary_operands_p (code, mode, operands))
17430 {
17431 rtx temp;
17432
17433 /* It is invalid to swap operands of different modes. */
17434 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17435
17436 temp = src1;
17437 src1 = src2;
17438 src2 = temp;
17439 }
17440
17441 /* Both source operands cannot be in memory. */
17442 if (MEM_P (src1) && MEM_P (src2))
17443 {
17444 /* Optimization: Only read from memory once. */
17445 if (rtx_equal_p (src1, src2))
17446 {
17447 src2 = force_reg (mode, src2);
17448 src1 = src2;
17449 }
17450 else if (rtx_equal_p (dst, src1))
17451 src2 = force_reg (mode, src2);
17452 else
17453 src1 = force_reg (mode, src1);
17454 }
17455
17456 /* If the destination is memory, and we do not have matching source
17457 operands, do things in registers. */
17458 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17459 dst = gen_reg_rtx (mode);
17460
17461 /* Source 1 cannot be a constant. */
17462 if (CONSTANT_P (src1))
17463 src1 = force_reg (mode, src1);
17464
17465 /* Source 1 cannot be a non-matching memory. */
17466 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17467 src1 = force_reg (mode, src1);
17468
17469 /* Improve address combine. */
17470 if (code == PLUS
17471 && GET_MODE_CLASS (mode) == MODE_INT
17472 && MEM_P (src2))
17473 src2 = force_reg (mode, src2);
17474
17475 operands[1] = src1;
17476 operands[2] = src2;
17477 return dst;
17478 }
17479
17480 /* Similarly, but assume that the destination has already been
17481 set up properly. */
17482
17483 void
17484 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17485 enum machine_mode mode, rtx operands[])
17486 {
17487 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17488 gcc_assert (dst == operands[0]);
17489 }
17490
17491 /* Attempt to expand a binary operator. Make the expansion closer to the
17492 actual machine, then just general_operand, which will allow 3 separate
17493 memory references (one output, two input) in a single insn. */
17494
17495 void
17496 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17497 rtx operands[])
17498 {
17499 rtx src1, src2, dst, op, clob;
17500
17501 dst = ix86_fixup_binary_operands (code, mode, operands);
17502 src1 = operands[1];
17503 src2 = operands[2];
17504
17505 /* Emit the instruction. */
17506
17507 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17508 if (reload_in_progress)
17509 {
17510 /* Reload doesn't know about the flags register, and doesn't know that
17511 it doesn't want to clobber it. We can only do this with PLUS. */
17512 gcc_assert (code == PLUS);
17513 emit_insn (op);
17514 }
17515 else if (reload_completed
17516 && code == PLUS
17517 && !rtx_equal_p (dst, src1))
17518 {
17519 /* This is going to be an LEA; avoid splitting it later. */
17520 emit_insn (op);
17521 }
17522 else
17523 {
17524 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17525 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17526 }
17527
17528 /* Fix up the destination if needed. */
17529 if (dst != operands[0])
17530 emit_move_insn (operands[0], dst);
17531 }
17532
17533 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17534 the given OPERANDS. */
17535
17536 void
17537 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17538 rtx operands[])
17539 {
17540 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17541 if (GET_CODE (operands[1]) == SUBREG)
17542 {
17543 op1 = operands[1];
17544 op2 = operands[2];
17545 }
17546 else if (GET_CODE (operands[2]) == SUBREG)
17547 {
17548 op1 = operands[2];
17549 op2 = operands[1];
17550 }
17551 /* Optimize (__m128i) d | (__m128i) e and similar code
17552 when d and e are float vectors into float vector logical
17553 insn. In C/C++ without using intrinsics there is no other way
17554 to express vector logical operation on float vectors than
17555 to cast them temporarily to integer vectors. */
17556 if (op1
17557 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17558 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17559 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17560 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17561 && SUBREG_BYTE (op1) == 0
17562 && (GET_CODE (op2) == CONST_VECTOR
17563 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17564 && SUBREG_BYTE (op2) == 0))
17565 && can_create_pseudo_p ())
17566 {
17567 rtx dst;
17568 switch (GET_MODE (SUBREG_REG (op1)))
17569 {
17570 case V4SFmode:
17571 case V8SFmode:
17572 case V16SFmode:
17573 case V2DFmode:
17574 case V4DFmode:
17575 case V8DFmode:
17576 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17577 if (GET_CODE (op2) == CONST_VECTOR)
17578 {
17579 op2 = gen_lowpart (GET_MODE (dst), op2);
17580 op2 = force_reg (GET_MODE (dst), op2);
17581 }
17582 else
17583 {
17584 op1 = operands[1];
17585 op2 = SUBREG_REG (operands[2]);
17586 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17587 op2 = force_reg (GET_MODE (dst), op2);
17588 }
17589 op1 = SUBREG_REG (op1);
17590 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17591 op1 = force_reg (GET_MODE (dst), op1);
17592 emit_insn (gen_rtx_SET (VOIDmode, dst,
17593 gen_rtx_fmt_ee (code, GET_MODE (dst),
17594 op1, op2)));
17595 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17596 return;
17597 default:
17598 break;
17599 }
17600 }
17601 if (!nonimmediate_operand (operands[1], mode))
17602 operands[1] = force_reg (mode, operands[1]);
17603 if (!nonimmediate_operand (operands[2], mode))
17604 operands[2] = force_reg (mode, operands[2]);
17605 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17606 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17607 gen_rtx_fmt_ee (code, mode, operands[1],
17608 operands[2])));
17609 }
17610
17611 /* Return TRUE or FALSE depending on whether the binary operator meets the
17612 appropriate constraints. */
17613
17614 bool
17615 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17616 rtx operands[3])
17617 {
17618 rtx dst = operands[0];
17619 rtx src1 = operands[1];
17620 rtx src2 = operands[2];
17621
17622 /* Both source operands cannot be in memory. */
17623 if (MEM_P (src1) && MEM_P (src2))
17624 return false;
17625
17626 /* Canonicalize operand order for commutative operators. */
17627 if (ix86_swap_binary_operands_p (code, mode, operands))
17628 {
17629 rtx temp = src1;
17630 src1 = src2;
17631 src2 = temp;
17632 }
17633
17634 /* If the destination is memory, we must have a matching source operand. */
17635 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17636 return false;
17637
17638 /* Source 1 cannot be a constant. */
17639 if (CONSTANT_P (src1))
17640 return false;
17641
17642 /* Source 1 cannot be a non-matching memory. */
17643 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17644 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17645 return (code == AND
17646 && (mode == HImode
17647 || mode == SImode
17648 || (TARGET_64BIT && mode == DImode))
17649 && satisfies_constraint_L (src2));
17650
17651 return true;
17652 }
17653
17654 /* Attempt to expand a unary operator. Make the expansion closer to the
17655 actual machine, then just general_operand, which will allow 2 separate
17656 memory references (one output, one input) in a single insn. */
17657
17658 void
17659 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17660 rtx operands[])
17661 {
17662 int matching_memory;
17663 rtx src, dst, op, clob;
17664
17665 dst = operands[0];
17666 src = operands[1];
17667
17668 /* If the destination is memory, and we do not have matching source
17669 operands, do things in registers. */
17670 matching_memory = 0;
17671 if (MEM_P (dst))
17672 {
17673 if (rtx_equal_p (dst, src))
17674 matching_memory = 1;
17675 else
17676 dst = gen_reg_rtx (mode);
17677 }
17678
17679 /* When source operand is memory, destination must match. */
17680 if (MEM_P (src) && !matching_memory)
17681 src = force_reg (mode, src);
17682
17683 /* Emit the instruction. */
17684
17685 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17686 if (reload_in_progress || code == NOT)
17687 {
17688 /* Reload doesn't know about the flags register, and doesn't know that
17689 it doesn't want to clobber it. */
17690 gcc_assert (code == NOT);
17691 emit_insn (op);
17692 }
17693 else
17694 {
17695 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17696 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17697 }
17698
17699 /* Fix up the destination if needed. */
17700 if (dst != operands[0])
17701 emit_move_insn (operands[0], dst);
17702 }
17703
17704 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17705 divisor are within the range [0-255]. */
17706
17707 void
17708 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17709 bool signed_p)
17710 {
17711 rtx_code_label *end_label, *qimode_label;
17712 rtx insn, div, mod;
17713 rtx scratch, tmp0, tmp1, tmp2;
17714 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17715 rtx (*gen_zero_extend) (rtx, rtx);
17716 rtx (*gen_test_ccno_1) (rtx, rtx);
17717
17718 switch (mode)
17719 {
17720 case SImode:
17721 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17722 gen_test_ccno_1 = gen_testsi_ccno_1;
17723 gen_zero_extend = gen_zero_extendqisi2;
17724 break;
17725 case DImode:
17726 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17727 gen_test_ccno_1 = gen_testdi_ccno_1;
17728 gen_zero_extend = gen_zero_extendqidi2;
17729 break;
17730 default:
17731 gcc_unreachable ();
17732 }
17733
17734 end_label = gen_label_rtx ();
17735 qimode_label = gen_label_rtx ();
17736
17737 scratch = gen_reg_rtx (mode);
17738
17739 /* Use 8bit unsigned divimod if dividend and divisor are within
17740 the range [0-255]. */
17741 emit_move_insn (scratch, operands[2]);
17742 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17743 scratch, 1, OPTAB_DIRECT);
17744 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17745 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17746 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17747 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17748 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17749 pc_rtx);
17750 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17751 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17752 JUMP_LABEL (insn) = qimode_label;
17753
17754 /* Generate original signed/unsigned divimod. */
17755 div = gen_divmod4_1 (operands[0], operands[1],
17756 operands[2], operands[3]);
17757 emit_insn (div);
17758
17759 /* Branch to the end. */
17760 emit_jump_insn (gen_jump (end_label));
17761 emit_barrier ();
17762
17763 /* Generate 8bit unsigned divide. */
17764 emit_label (qimode_label);
17765 /* Don't use operands[0] for result of 8bit divide since not all
17766 registers support QImode ZERO_EXTRACT. */
17767 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17768 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17769 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17770 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17771
17772 if (signed_p)
17773 {
17774 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17775 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17776 }
17777 else
17778 {
17779 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17780 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17781 }
17782
17783 /* Extract remainder from AH. */
17784 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17785 if (REG_P (operands[1]))
17786 insn = emit_move_insn (operands[1], tmp1);
17787 else
17788 {
17789 /* Need a new scratch register since the old one has result
17790 of 8bit divide. */
17791 scratch = gen_reg_rtx (mode);
17792 emit_move_insn (scratch, tmp1);
17793 insn = emit_move_insn (operands[1], scratch);
17794 }
17795 set_unique_reg_note (insn, REG_EQUAL, mod);
17796
17797 /* Zero extend quotient from AL. */
17798 tmp1 = gen_lowpart (QImode, tmp0);
17799 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17800 set_unique_reg_note (insn, REG_EQUAL, div);
17801
17802 emit_label (end_label);
17803 }
17804
17805 #define LEA_MAX_STALL (3)
17806 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17807
17808 /* Increase given DISTANCE in half-cycles according to
17809 dependencies between PREV and NEXT instructions.
17810 Add 1 half-cycle if there is no dependency and
17811 go to next cycle if there is some dependecy. */
17812
17813 static unsigned int
17814 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17815 {
17816 df_ref def, use;
17817
17818 if (!prev || !next)
17819 return distance + (distance & 1) + 2;
17820
17821 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17822 return distance + 1;
17823
17824 FOR_EACH_INSN_USE (use, next)
17825 FOR_EACH_INSN_DEF (def, prev)
17826 if (!DF_REF_IS_ARTIFICIAL (def)
17827 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17828 return distance + (distance & 1) + 2;
17829
17830 return distance + 1;
17831 }
17832
17833 /* Function checks if instruction INSN defines register number
17834 REGNO1 or REGNO2. */
17835
17836 static bool
17837 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17838 rtx insn)
17839 {
17840 df_ref def;
17841
17842 FOR_EACH_INSN_DEF (def, insn)
17843 if (DF_REF_REG_DEF_P (def)
17844 && !DF_REF_IS_ARTIFICIAL (def)
17845 && (regno1 == DF_REF_REGNO (def)
17846 || regno2 == DF_REF_REGNO (def)))
17847 return true;
17848
17849 return false;
17850 }
17851
17852 /* Function checks if instruction INSN uses register number
17853 REGNO as a part of address expression. */
17854
17855 static bool
17856 insn_uses_reg_mem (unsigned int regno, rtx insn)
17857 {
17858 df_ref use;
17859
17860 FOR_EACH_INSN_USE (use, insn)
17861 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17862 return true;
17863
17864 return false;
17865 }
17866
17867 /* Search backward for non-agu definition of register number REGNO1
17868 or register number REGNO2 in basic block starting from instruction
17869 START up to head of basic block or instruction INSN.
17870
17871 Function puts true value into *FOUND var if definition was found
17872 and false otherwise.
17873
17874 Distance in half-cycles between START and found instruction or head
17875 of BB is added to DISTANCE and returned. */
17876
17877 static int
17878 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17879 rtx_insn *insn, int distance,
17880 rtx_insn *start, bool *found)
17881 {
17882 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17883 rtx_insn *prev = start;
17884 rtx_insn *next = NULL;
17885
17886 *found = false;
17887
17888 while (prev
17889 && prev != insn
17890 && distance < LEA_SEARCH_THRESHOLD)
17891 {
17892 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17893 {
17894 distance = increase_distance (prev, next, distance);
17895 if (insn_defines_reg (regno1, regno2, prev))
17896 {
17897 if (recog_memoized (prev) < 0
17898 || get_attr_type (prev) != TYPE_LEA)
17899 {
17900 *found = true;
17901 return distance;
17902 }
17903 }
17904
17905 next = prev;
17906 }
17907 if (prev == BB_HEAD (bb))
17908 break;
17909
17910 prev = PREV_INSN (prev);
17911 }
17912
17913 return distance;
17914 }
17915
17916 /* Search backward for non-agu definition of register number REGNO1
17917 or register number REGNO2 in INSN's basic block until
17918 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17919 2. Reach neighbour BBs boundary, or
17920 3. Reach agu definition.
17921 Returns the distance between the non-agu definition point and INSN.
17922 If no definition point, returns -1. */
17923
17924 static int
17925 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17926 rtx_insn *insn)
17927 {
17928 basic_block bb = BLOCK_FOR_INSN (insn);
17929 int distance = 0;
17930 bool found = false;
17931
17932 if (insn != BB_HEAD (bb))
17933 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17934 distance, PREV_INSN (insn),
17935 &found);
17936
17937 if (!found && distance < LEA_SEARCH_THRESHOLD)
17938 {
17939 edge e;
17940 edge_iterator ei;
17941 bool simple_loop = false;
17942
17943 FOR_EACH_EDGE (e, ei, bb->preds)
17944 if (e->src == bb)
17945 {
17946 simple_loop = true;
17947 break;
17948 }
17949
17950 if (simple_loop)
17951 distance = distance_non_agu_define_in_bb (regno1, regno2,
17952 insn, distance,
17953 BB_END (bb), &found);
17954 else
17955 {
17956 int shortest_dist = -1;
17957 bool found_in_bb = false;
17958
17959 FOR_EACH_EDGE (e, ei, bb->preds)
17960 {
17961 int bb_dist
17962 = distance_non_agu_define_in_bb (regno1, regno2,
17963 insn, distance,
17964 BB_END (e->src),
17965 &found_in_bb);
17966 if (found_in_bb)
17967 {
17968 if (shortest_dist < 0)
17969 shortest_dist = bb_dist;
17970 else if (bb_dist > 0)
17971 shortest_dist = MIN (bb_dist, shortest_dist);
17972
17973 found = true;
17974 }
17975 }
17976
17977 distance = shortest_dist;
17978 }
17979 }
17980
17981 /* get_attr_type may modify recog data. We want to make sure
17982 that recog data is valid for instruction INSN, on which
17983 distance_non_agu_define is called. INSN is unchanged here. */
17984 extract_insn_cached (insn);
17985
17986 if (!found)
17987 return -1;
17988
17989 return distance >> 1;
17990 }
17991
17992 /* Return the distance in half-cycles between INSN and the next
17993 insn that uses register number REGNO in memory address added
17994 to DISTANCE. Return -1 if REGNO0 is set.
17995
17996 Put true value into *FOUND if register usage was found and
17997 false otherwise.
17998 Put true value into *REDEFINED if register redefinition was
17999 found and false otherwise. */
18000
18001 static int
18002 distance_agu_use_in_bb (unsigned int regno,
18003 rtx_insn *insn, int distance, rtx_insn *start,
18004 bool *found, bool *redefined)
18005 {
18006 basic_block bb = NULL;
18007 rtx_insn *next = start;
18008 rtx_insn *prev = NULL;
18009
18010 *found = false;
18011 *redefined = false;
18012
18013 if (start != NULL_RTX)
18014 {
18015 bb = BLOCK_FOR_INSN (start);
18016 if (start != BB_HEAD (bb))
18017 /* If insn and start belong to the same bb, set prev to insn,
18018 so the call to increase_distance will increase the distance
18019 between insns by 1. */
18020 prev = insn;
18021 }
18022
18023 while (next
18024 && next != insn
18025 && distance < LEA_SEARCH_THRESHOLD)
18026 {
18027 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18028 {
18029 distance = increase_distance(prev, next, distance);
18030 if (insn_uses_reg_mem (regno, next))
18031 {
18032 /* Return DISTANCE if OP0 is used in memory
18033 address in NEXT. */
18034 *found = true;
18035 return distance;
18036 }
18037
18038 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18039 {
18040 /* Return -1 if OP0 is set in NEXT. */
18041 *redefined = true;
18042 return -1;
18043 }
18044
18045 prev = next;
18046 }
18047
18048 if (next == BB_END (bb))
18049 break;
18050
18051 next = NEXT_INSN (next);
18052 }
18053
18054 return distance;
18055 }
18056
18057 /* Return the distance between INSN and the next insn that uses
18058 register number REGNO0 in memory address. Return -1 if no such
18059 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18060
18061 static int
18062 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18063 {
18064 basic_block bb = BLOCK_FOR_INSN (insn);
18065 int distance = 0;
18066 bool found = false;
18067 bool redefined = false;
18068
18069 if (insn != BB_END (bb))
18070 distance = distance_agu_use_in_bb (regno0, insn, distance,
18071 NEXT_INSN (insn),
18072 &found, &redefined);
18073
18074 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18075 {
18076 edge e;
18077 edge_iterator ei;
18078 bool simple_loop = false;
18079
18080 FOR_EACH_EDGE (e, ei, bb->succs)
18081 if (e->dest == bb)
18082 {
18083 simple_loop = true;
18084 break;
18085 }
18086
18087 if (simple_loop)
18088 distance = distance_agu_use_in_bb (regno0, insn,
18089 distance, BB_HEAD (bb),
18090 &found, &redefined);
18091 else
18092 {
18093 int shortest_dist = -1;
18094 bool found_in_bb = false;
18095 bool redefined_in_bb = false;
18096
18097 FOR_EACH_EDGE (e, ei, bb->succs)
18098 {
18099 int bb_dist
18100 = distance_agu_use_in_bb (regno0, insn,
18101 distance, BB_HEAD (e->dest),
18102 &found_in_bb, &redefined_in_bb);
18103 if (found_in_bb)
18104 {
18105 if (shortest_dist < 0)
18106 shortest_dist = bb_dist;
18107 else if (bb_dist > 0)
18108 shortest_dist = MIN (bb_dist, shortest_dist);
18109
18110 found = true;
18111 }
18112 }
18113
18114 distance = shortest_dist;
18115 }
18116 }
18117
18118 if (!found || redefined)
18119 return -1;
18120
18121 return distance >> 1;
18122 }
18123
18124 /* Define this macro to tune LEA priority vs ADD, it take effect when
18125 there is a dilemma of choicing LEA or ADD
18126 Negative value: ADD is more preferred than LEA
18127 Zero: Netrual
18128 Positive value: LEA is more preferred than ADD*/
18129 #define IX86_LEA_PRIORITY 0
18130
18131 /* Return true if usage of lea INSN has performance advantage
18132 over a sequence of instructions. Instructions sequence has
18133 SPLIT_COST cycles higher latency than lea latency. */
18134
18135 static bool
18136 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18137 unsigned int regno2, int split_cost, bool has_scale)
18138 {
18139 int dist_define, dist_use;
18140
18141 /* For Silvermont if using a 2-source or 3-source LEA for
18142 non-destructive destination purposes, or due to wanting
18143 ability to use SCALE, the use of LEA is justified. */
18144 if (TARGET_SILVERMONT || TARGET_INTEL)
18145 {
18146 if (has_scale)
18147 return true;
18148 if (split_cost < 1)
18149 return false;
18150 if (regno0 == regno1 || regno0 == regno2)
18151 return false;
18152 return true;
18153 }
18154
18155 dist_define = distance_non_agu_define (regno1, regno2, insn);
18156 dist_use = distance_agu_use (regno0, insn);
18157
18158 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18159 {
18160 /* If there is no non AGU operand definition, no AGU
18161 operand usage and split cost is 0 then both lea
18162 and non lea variants have same priority. Currently
18163 we prefer lea for 64 bit code and non lea on 32 bit
18164 code. */
18165 if (dist_use < 0 && split_cost == 0)
18166 return TARGET_64BIT || IX86_LEA_PRIORITY;
18167 else
18168 return true;
18169 }
18170
18171 /* With longer definitions distance lea is more preferable.
18172 Here we change it to take into account splitting cost and
18173 lea priority. */
18174 dist_define += split_cost + IX86_LEA_PRIORITY;
18175
18176 /* If there is no use in memory addess then we just check
18177 that split cost exceeds AGU stall. */
18178 if (dist_use < 0)
18179 return dist_define > LEA_MAX_STALL;
18180
18181 /* If this insn has both backward non-agu dependence and forward
18182 agu dependence, the one with short distance takes effect. */
18183 return dist_define >= dist_use;
18184 }
18185
18186 /* Return true if it is legal to clobber flags by INSN and
18187 false otherwise. */
18188
18189 static bool
18190 ix86_ok_to_clobber_flags (rtx_insn *insn)
18191 {
18192 basic_block bb = BLOCK_FOR_INSN (insn);
18193 df_ref use;
18194 bitmap live;
18195
18196 while (insn)
18197 {
18198 if (NONDEBUG_INSN_P (insn))
18199 {
18200 FOR_EACH_INSN_USE (use, insn)
18201 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18202 return false;
18203
18204 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18205 return true;
18206 }
18207
18208 if (insn == BB_END (bb))
18209 break;
18210
18211 insn = NEXT_INSN (insn);
18212 }
18213
18214 live = df_get_live_out(bb);
18215 return !REGNO_REG_SET_P (live, FLAGS_REG);
18216 }
18217
18218 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18219 move and add to avoid AGU stalls. */
18220
18221 bool
18222 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18223 {
18224 unsigned int regno0, regno1, regno2;
18225
18226 /* Check if we need to optimize. */
18227 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18228 return false;
18229
18230 /* Check it is correct to split here. */
18231 if (!ix86_ok_to_clobber_flags(insn))
18232 return false;
18233
18234 regno0 = true_regnum (operands[0]);
18235 regno1 = true_regnum (operands[1]);
18236 regno2 = true_regnum (operands[2]);
18237
18238 /* We need to split only adds with non destructive
18239 destination operand. */
18240 if (regno0 == regno1 || regno0 == regno2)
18241 return false;
18242 else
18243 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18244 }
18245
18246 /* Return true if we should emit lea instruction instead of mov
18247 instruction. */
18248
18249 bool
18250 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18251 {
18252 unsigned int regno0, regno1;
18253
18254 /* Check if we need to optimize. */
18255 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18256 return false;
18257
18258 /* Use lea for reg to reg moves only. */
18259 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18260 return false;
18261
18262 regno0 = true_regnum (operands[0]);
18263 regno1 = true_regnum (operands[1]);
18264
18265 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18266 }
18267
18268 /* Return true if we need to split lea into a sequence of
18269 instructions to avoid AGU stalls. */
18270
18271 bool
18272 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18273 {
18274 unsigned int regno0, regno1, regno2;
18275 int split_cost;
18276 struct ix86_address parts;
18277 int ok;
18278
18279 /* Check we need to optimize. */
18280 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18281 return false;
18282
18283 /* The "at least two components" test below might not catch simple
18284 move or zero extension insns if parts.base is non-NULL and parts.disp
18285 is const0_rtx as the only components in the address, e.g. if the
18286 register is %rbp or %r13. As this test is much cheaper and moves or
18287 zero extensions are the common case, do this check first. */
18288 if (REG_P (operands[1])
18289 || (SImode_address_operand (operands[1], VOIDmode)
18290 && REG_P (XEXP (operands[1], 0))))
18291 return false;
18292
18293 /* Check if it is OK to split here. */
18294 if (!ix86_ok_to_clobber_flags (insn))
18295 return false;
18296
18297 ok = ix86_decompose_address (operands[1], &parts);
18298 gcc_assert (ok);
18299
18300 /* There should be at least two components in the address. */
18301 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18302 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18303 return false;
18304
18305 /* We should not split into add if non legitimate pic
18306 operand is used as displacement. */
18307 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18308 return false;
18309
18310 regno0 = true_regnum (operands[0]) ;
18311 regno1 = INVALID_REGNUM;
18312 regno2 = INVALID_REGNUM;
18313
18314 if (parts.base)
18315 regno1 = true_regnum (parts.base);
18316 if (parts.index)
18317 regno2 = true_regnum (parts.index);
18318
18319 split_cost = 0;
18320
18321 /* Compute how many cycles we will add to execution time
18322 if split lea into a sequence of instructions. */
18323 if (parts.base || parts.index)
18324 {
18325 /* Have to use mov instruction if non desctructive
18326 destination form is used. */
18327 if (regno1 != regno0 && regno2 != regno0)
18328 split_cost += 1;
18329
18330 /* Have to add index to base if both exist. */
18331 if (parts.base && parts.index)
18332 split_cost += 1;
18333
18334 /* Have to use shift and adds if scale is 2 or greater. */
18335 if (parts.scale > 1)
18336 {
18337 if (regno0 != regno1)
18338 split_cost += 1;
18339 else if (regno2 == regno0)
18340 split_cost += 4;
18341 else
18342 split_cost += parts.scale;
18343 }
18344
18345 /* Have to use add instruction with immediate if
18346 disp is non zero. */
18347 if (parts.disp && parts.disp != const0_rtx)
18348 split_cost += 1;
18349
18350 /* Subtract the price of lea. */
18351 split_cost -= 1;
18352 }
18353
18354 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18355 parts.scale > 1);
18356 }
18357
18358 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18359 matches destination. RTX includes clobber of FLAGS_REG. */
18360
18361 static void
18362 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18363 rtx dst, rtx src)
18364 {
18365 rtx op, clob;
18366
18367 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18368 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18369
18370 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18371 }
18372
18373 /* Return true if regno1 def is nearest to the insn. */
18374
18375 static bool
18376 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18377 {
18378 rtx_insn *prev = insn;
18379 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18380
18381 if (insn == start)
18382 return false;
18383 while (prev && prev != start)
18384 {
18385 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18386 {
18387 prev = PREV_INSN (prev);
18388 continue;
18389 }
18390 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18391 return true;
18392 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18393 return false;
18394 prev = PREV_INSN (prev);
18395 }
18396
18397 /* None of the regs is defined in the bb. */
18398 return false;
18399 }
18400
18401 /* Split lea instructions into a sequence of instructions
18402 which are executed on ALU to avoid AGU stalls.
18403 It is assumed that it is allowed to clobber flags register
18404 at lea position. */
18405
18406 void
18407 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18408 {
18409 unsigned int regno0, regno1, regno2;
18410 struct ix86_address parts;
18411 rtx target, tmp;
18412 int ok, adds;
18413
18414 ok = ix86_decompose_address (operands[1], &parts);
18415 gcc_assert (ok);
18416
18417 target = gen_lowpart (mode, operands[0]);
18418
18419 regno0 = true_regnum (target);
18420 regno1 = INVALID_REGNUM;
18421 regno2 = INVALID_REGNUM;
18422
18423 if (parts.base)
18424 {
18425 parts.base = gen_lowpart (mode, parts.base);
18426 regno1 = true_regnum (parts.base);
18427 }
18428
18429 if (parts.index)
18430 {
18431 parts.index = gen_lowpart (mode, parts.index);
18432 regno2 = true_regnum (parts.index);
18433 }
18434
18435 if (parts.disp)
18436 parts.disp = gen_lowpart (mode, parts.disp);
18437
18438 if (parts.scale > 1)
18439 {
18440 /* Case r1 = r1 + ... */
18441 if (regno1 == regno0)
18442 {
18443 /* If we have a case r1 = r1 + C * r2 then we
18444 should use multiplication which is very
18445 expensive. Assume cost model is wrong if we
18446 have such case here. */
18447 gcc_assert (regno2 != regno0);
18448
18449 for (adds = parts.scale; adds > 0; adds--)
18450 ix86_emit_binop (PLUS, mode, target, parts.index);
18451 }
18452 else
18453 {
18454 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18455 if (regno0 != regno2)
18456 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18457
18458 /* Use shift for scaling. */
18459 ix86_emit_binop (ASHIFT, mode, target,
18460 GEN_INT (exact_log2 (parts.scale)));
18461
18462 if (parts.base)
18463 ix86_emit_binop (PLUS, mode, target, parts.base);
18464
18465 if (parts.disp && parts.disp != const0_rtx)
18466 ix86_emit_binop (PLUS, mode, target, parts.disp);
18467 }
18468 }
18469 else if (!parts.base && !parts.index)
18470 {
18471 gcc_assert(parts.disp);
18472 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18473 }
18474 else
18475 {
18476 if (!parts.base)
18477 {
18478 if (regno0 != regno2)
18479 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18480 }
18481 else if (!parts.index)
18482 {
18483 if (regno0 != regno1)
18484 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18485 }
18486 else
18487 {
18488 if (regno0 == regno1)
18489 tmp = parts.index;
18490 else if (regno0 == regno2)
18491 tmp = parts.base;
18492 else
18493 {
18494 rtx tmp1;
18495
18496 /* Find better operand for SET instruction, depending
18497 on which definition is farther from the insn. */
18498 if (find_nearest_reg_def (insn, regno1, regno2))
18499 tmp = parts.index, tmp1 = parts.base;
18500 else
18501 tmp = parts.base, tmp1 = parts.index;
18502
18503 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18504
18505 if (parts.disp && parts.disp != const0_rtx)
18506 ix86_emit_binop (PLUS, mode, target, parts.disp);
18507
18508 ix86_emit_binop (PLUS, mode, target, tmp1);
18509 return;
18510 }
18511
18512 ix86_emit_binop (PLUS, mode, target, tmp);
18513 }
18514
18515 if (parts.disp && parts.disp != const0_rtx)
18516 ix86_emit_binop (PLUS, mode, target, parts.disp);
18517 }
18518 }
18519
18520 /* Return true if it is ok to optimize an ADD operation to LEA
18521 operation to avoid flag register consumation. For most processors,
18522 ADD is faster than LEA. For the processors like BONNELL, if the
18523 destination register of LEA holds an actual address which will be
18524 used soon, LEA is better and otherwise ADD is better. */
18525
18526 bool
18527 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18528 {
18529 unsigned int regno0 = true_regnum (operands[0]);
18530 unsigned int regno1 = true_regnum (operands[1]);
18531 unsigned int regno2 = true_regnum (operands[2]);
18532
18533 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18534 if (regno0 != regno1 && regno0 != regno2)
18535 return true;
18536
18537 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18538 return false;
18539
18540 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18541 }
18542
18543 /* Return true if destination reg of SET_BODY is shift count of
18544 USE_BODY. */
18545
18546 static bool
18547 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18548 {
18549 rtx set_dest;
18550 rtx shift_rtx;
18551 int i;
18552
18553 /* Retrieve destination of SET_BODY. */
18554 switch (GET_CODE (set_body))
18555 {
18556 case SET:
18557 set_dest = SET_DEST (set_body);
18558 if (!set_dest || !REG_P (set_dest))
18559 return false;
18560 break;
18561 case PARALLEL:
18562 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18563 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18564 use_body))
18565 return true;
18566 default:
18567 return false;
18568 break;
18569 }
18570
18571 /* Retrieve shift count of USE_BODY. */
18572 switch (GET_CODE (use_body))
18573 {
18574 case SET:
18575 shift_rtx = XEXP (use_body, 1);
18576 break;
18577 case PARALLEL:
18578 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18579 if (ix86_dep_by_shift_count_body (set_body,
18580 XVECEXP (use_body, 0, i)))
18581 return true;
18582 default:
18583 return false;
18584 break;
18585 }
18586
18587 if (shift_rtx
18588 && (GET_CODE (shift_rtx) == ASHIFT
18589 || GET_CODE (shift_rtx) == LSHIFTRT
18590 || GET_CODE (shift_rtx) == ASHIFTRT
18591 || GET_CODE (shift_rtx) == ROTATE
18592 || GET_CODE (shift_rtx) == ROTATERT))
18593 {
18594 rtx shift_count = XEXP (shift_rtx, 1);
18595
18596 /* Return true if shift count is dest of SET_BODY. */
18597 if (REG_P (shift_count))
18598 {
18599 /* Add check since it can be invoked before register
18600 allocation in pre-reload schedule. */
18601 if (reload_completed
18602 && true_regnum (set_dest) == true_regnum (shift_count))
18603 return true;
18604 else if (REGNO(set_dest) == REGNO(shift_count))
18605 return true;
18606 }
18607 }
18608
18609 return false;
18610 }
18611
18612 /* Return true if destination reg of SET_INSN is shift count of
18613 USE_INSN. */
18614
18615 bool
18616 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18617 {
18618 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18619 PATTERN (use_insn));
18620 }
18621
18622 /* Return TRUE or FALSE depending on whether the unary operator meets the
18623 appropriate constraints. */
18624
18625 bool
18626 ix86_unary_operator_ok (enum rtx_code,
18627 enum machine_mode,
18628 rtx operands[2])
18629 {
18630 /* If one of operands is memory, source and destination must match. */
18631 if ((MEM_P (operands[0])
18632 || MEM_P (operands[1]))
18633 && ! rtx_equal_p (operands[0], operands[1]))
18634 return false;
18635 return true;
18636 }
18637
18638 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18639 are ok, keeping in mind the possible movddup alternative. */
18640
18641 bool
18642 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18643 {
18644 if (MEM_P (operands[0]))
18645 return rtx_equal_p (operands[0], operands[1 + high]);
18646 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18647 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18648 return true;
18649 }
18650
18651 /* Post-reload splitter for converting an SF or DFmode value in an
18652 SSE register into an unsigned SImode. */
18653
18654 void
18655 ix86_split_convert_uns_si_sse (rtx operands[])
18656 {
18657 enum machine_mode vecmode;
18658 rtx value, large, zero_or_two31, input, two31, x;
18659
18660 large = operands[1];
18661 zero_or_two31 = operands[2];
18662 input = operands[3];
18663 two31 = operands[4];
18664 vecmode = GET_MODE (large);
18665 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18666
18667 /* Load up the value into the low element. We must ensure that the other
18668 elements are valid floats -- zero is the easiest such value. */
18669 if (MEM_P (input))
18670 {
18671 if (vecmode == V4SFmode)
18672 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18673 else
18674 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18675 }
18676 else
18677 {
18678 input = gen_rtx_REG (vecmode, REGNO (input));
18679 emit_move_insn (value, CONST0_RTX (vecmode));
18680 if (vecmode == V4SFmode)
18681 emit_insn (gen_sse_movss (value, value, input));
18682 else
18683 emit_insn (gen_sse2_movsd (value, value, input));
18684 }
18685
18686 emit_move_insn (large, two31);
18687 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18688
18689 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18690 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18691
18692 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18693 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18694
18695 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18696 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18697
18698 large = gen_rtx_REG (V4SImode, REGNO (large));
18699 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18700
18701 x = gen_rtx_REG (V4SImode, REGNO (value));
18702 if (vecmode == V4SFmode)
18703 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18704 else
18705 emit_insn (gen_sse2_cvttpd2dq (x, value));
18706 value = x;
18707
18708 emit_insn (gen_xorv4si3 (value, value, large));
18709 }
18710
18711 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18712 Expects the 64-bit DImode to be supplied in a pair of integral
18713 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18714 -mfpmath=sse, !optimize_size only. */
18715
18716 void
18717 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18718 {
18719 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18720 rtx int_xmm, fp_xmm;
18721 rtx biases, exponents;
18722 rtx x;
18723
18724 int_xmm = gen_reg_rtx (V4SImode);
18725 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18726 emit_insn (gen_movdi_to_sse (int_xmm, input));
18727 else if (TARGET_SSE_SPLIT_REGS)
18728 {
18729 emit_clobber (int_xmm);
18730 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18731 }
18732 else
18733 {
18734 x = gen_reg_rtx (V2DImode);
18735 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18736 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18737 }
18738
18739 x = gen_rtx_CONST_VECTOR (V4SImode,
18740 gen_rtvec (4, GEN_INT (0x43300000UL),
18741 GEN_INT (0x45300000UL),
18742 const0_rtx, const0_rtx));
18743 exponents = validize_mem (force_const_mem (V4SImode, x));
18744
18745 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18746 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18747
18748 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18749 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18750 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18751 (0x1.0p84 + double(fp_value_hi_xmm)).
18752 Note these exponents differ by 32. */
18753
18754 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18755
18756 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18757 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18758 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18759 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18760 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18761 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18762 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18763 biases = validize_mem (force_const_mem (V2DFmode, biases));
18764 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18765
18766 /* Add the upper and lower DFmode values together. */
18767 if (TARGET_SSE3)
18768 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18769 else
18770 {
18771 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18772 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18773 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18774 }
18775
18776 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18777 }
18778
18779 /* Not used, but eases macroization of patterns. */
18780 void
18781 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18782 {
18783 gcc_unreachable ();
18784 }
18785
18786 /* Convert an unsigned SImode value into a DFmode. Only currently used
18787 for SSE, but applicable anywhere. */
18788
18789 void
18790 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18791 {
18792 REAL_VALUE_TYPE TWO31r;
18793 rtx x, fp;
18794
18795 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18796 NULL, 1, OPTAB_DIRECT);
18797
18798 fp = gen_reg_rtx (DFmode);
18799 emit_insn (gen_floatsidf2 (fp, x));
18800
18801 real_ldexp (&TWO31r, &dconst1, 31);
18802 x = const_double_from_real_value (TWO31r, DFmode);
18803
18804 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18805 if (x != target)
18806 emit_move_insn (target, x);
18807 }
18808
18809 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18810 32-bit mode; otherwise we have a direct convert instruction. */
18811
18812 void
18813 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18814 {
18815 REAL_VALUE_TYPE TWO32r;
18816 rtx fp_lo, fp_hi, x;
18817
18818 fp_lo = gen_reg_rtx (DFmode);
18819 fp_hi = gen_reg_rtx (DFmode);
18820
18821 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18822
18823 real_ldexp (&TWO32r, &dconst1, 32);
18824 x = const_double_from_real_value (TWO32r, DFmode);
18825 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18826
18827 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18828
18829 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18830 0, OPTAB_DIRECT);
18831 if (x != target)
18832 emit_move_insn (target, x);
18833 }
18834
18835 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18836 For x86_32, -mfpmath=sse, !optimize_size only. */
18837 void
18838 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18839 {
18840 REAL_VALUE_TYPE ONE16r;
18841 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18842
18843 real_ldexp (&ONE16r, &dconst1, 16);
18844 x = const_double_from_real_value (ONE16r, SFmode);
18845 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18846 NULL, 0, OPTAB_DIRECT);
18847 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18848 NULL, 0, OPTAB_DIRECT);
18849 fp_hi = gen_reg_rtx (SFmode);
18850 fp_lo = gen_reg_rtx (SFmode);
18851 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18852 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18853 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18854 0, OPTAB_DIRECT);
18855 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18856 0, OPTAB_DIRECT);
18857 if (!rtx_equal_p (target, fp_hi))
18858 emit_move_insn (target, fp_hi);
18859 }
18860
18861 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18862 a vector of unsigned ints VAL to vector of floats TARGET. */
18863
18864 void
18865 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18866 {
18867 rtx tmp[8];
18868 REAL_VALUE_TYPE TWO16r;
18869 enum machine_mode intmode = GET_MODE (val);
18870 enum machine_mode fltmode = GET_MODE (target);
18871 rtx (*cvt) (rtx, rtx);
18872
18873 if (intmode == V4SImode)
18874 cvt = gen_floatv4siv4sf2;
18875 else
18876 cvt = gen_floatv8siv8sf2;
18877 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18878 tmp[0] = force_reg (intmode, tmp[0]);
18879 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18880 OPTAB_DIRECT);
18881 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18882 NULL_RTX, 1, OPTAB_DIRECT);
18883 tmp[3] = gen_reg_rtx (fltmode);
18884 emit_insn (cvt (tmp[3], tmp[1]));
18885 tmp[4] = gen_reg_rtx (fltmode);
18886 emit_insn (cvt (tmp[4], tmp[2]));
18887 real_ldexp (&TWO16r, &dconst1, 16);
18888 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18889 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18890 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18891 OPTAB_DIRECT);
18892 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18893 OPTAB_DIRECT);
18894 if (tmp[7] != target)
18895 emit_move_insn (target, tmp[7]);
18896 }
18897
18898 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18899 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18900 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18901 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18902
18903 rtx
18904 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18905 {
18906 REAL_VALUE_TYPE TWO31r;
18907 rtx two31r, tmp[4];
18908 enum machine_mode mode = GET_MODE (val);
18909 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18910 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18911 rtx (*cmp) (rtx, rtx, rtx, rtx);
18912 int i;
18913
18914 for (i = 0; i < 3; i++)
18915 tmp[i] = gen_reg_rtx (mode);
18916 real_ldexp (&TWO31r, &dconst1, 31);
18917 two31r = const_double_from_real_value (TWO31r, scalarmode);
18918 two31r = ix86_build_const_vector (mode, 1, two31r);
18919 two31r = force_reg (mode, two31r);
18920 switch (mode)
18921 {
18922 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18923 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18924 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18925 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18926 default: gcc_unreachable ();
18927 }
18928 tmp[3] = gen_rtx_LE (mode, two31r, val);
18929 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18930 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18931 0, OPTAB_DIRECT);
18932 if (intmode == V4SImode || TARGET_AVX2)
18933 *xorp = expand_simple_binop (intmode, ASHIFT,
18934 gen_lowpart (intmode, tmp[0]),
18935 GEN_INT (31), NULL_RTX, 0,
18936 OPTAB_DIRECT);
18937 else
18938 {
18939 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18940 two31 = ix86_build_const_vector (intmode, 1, two31);
18941 *xorp = expand_simple_binop (intmode, AND,
18942 gen_lowpart (intmode, tmp[0]),
18943 two31, NULL_RTX, 0,
18944 OPTAB_DIRECT);
18945 }
18946 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18947 0, OPTAB_DIRECT);
18948 }
18949
18950 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18951 then replicate the value for all elements of the vector
18952 register. */
18953
18954 rtx
18955 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18956 {
18957 int i, n_elt;
18958 rtvec v;
18959 enum machine_mode scalar_mode;
18960
18961 switch (mode)
18962 {
18963 case V64QImode:
18964 case V32QImode:
18965 case V16QImode:
18966 case V32HImode:
18967 case V16HImode:
18968 case V8HImode:
18969 case V16SImode:
18970 case V8SImode:
18971 case V4SImode:
18972 case V8DImode:
18973 case V4DImode:
18974 case V2DImode:
18975 gcc_assert (vect);
18976 case V16SFmode:
18977 case V8SFmode:
18978 case V4SFmode:
18979 case V8DFmode:
18980 case V4DFmode:
18981 case V2DFmode:
18982 n_elt = GET_MODE_NUNITS (mode);
18983 v = rtvec_alloc (n_elt);
18984 scalar_mode = GET_MODE_INNER (mode);
18985
18986 RTVEC_ELT (v, 0) = value;
18987
18988 for (i = 1; i < n_elt; ++i)
18989 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18990
18991 return gen_rtx_CONST_VECTOR (mode, v);
18992
18993 default:
18994 gcc_unreachable ();
18995 }
18996 }
18997
18998 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18999 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19000 for an SSE register. If VECT is true, then replicate the mask for
19001 all elements of the vector register. If INVERT is true, then create
19002 a mask excluding the sign bit. */
19003
19004 rtx
19005 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19006 {
19007 enum machine_mode vec_mode, imode;
19008 HOST_WIDE_INT hi, lo;
19009 int shift = 63;
19010 rtx v;
19011 rtx mask;
19012
19013 /* Find the sign bit, sign extended to 2*HWI. */
19014 switch (mode)
19015 {
19016 case V16SImode:
19017 case V16SFmode:
19018 case V8SImode:
19019 case V4SImode:
19020 case V8SFmode:
19021 case V4SFmode:
19022 vec_mode = mode;
19023 mode = GET_MODE_INNER (mode);
19024 imode = SImode;
19025 lo = 0x80000000, hi = lo < 0;
19026 break;
19027
19028 case V8DImode:
19029 case V4DImode:
19030 case V2DImode:
19031 case V8DFmode:
19032 case V4DFmode:
19033 case V2DFmode:
19034 vec_mode = mode;
19035 mode = GET_MODE_INNER (mode);
19036 imode = DImode;
19037 if (HOST_BITS_PER_WIDE_INT >= 64)
19038 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19039 else
19040 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19041 break;
19042
19043 case TImode:
19044 case TFmode:
19045 vec_mode = VOIDmode;
19046 if (HOST_BITS_PER_WIDE_INT >= 64)
19047 {
19048 imode = TImode;
19049 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19050 }
19051 else
19052 {
19053 rtvec vec;
19054
19055 imode = DImode;
19056 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19057
19058 if (invert)
19059 {
19060 lo = ~lo, hi = ~hi;
19061 v = constm1_rtx;
19062 }
19063 else
19064 v = const0_rtx;
19065
19066 mask = immed_double_const (lo, hi, imode);
19067
19068 vec = gen_rtvec (2, v, mask);
19069 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19070 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19071
19072 return v;
19073 }
19074 break;
19075
19076 default:
19077 gcc_unreachable ();
19078 }
19079
19080 if (invert)
19081 lo = ~lo, hi = ~hi;
19082
19083 /* Force this value into the low part of a fp vector constant. */
19084 mask = immed_double_const (lo, hi, imode);
19085 mask = gen_lowpart (mode, mask);
19086
19087 if (vec_mode == VOIDmode)
19088 return force_reg (mode, mask);
19089
19090 v = ix86_build_const_vector (vec_mode, vect, mask);
19091 return force_reg (vec_mode, v);
19092 }
19093
19094 /* Generate code for floating point ABS or NEG. */
19095
19096 void
19097 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19098 rtx operands[])
19099 {
19100 rtx mask, set, dst, src;
19101 bool use_sse = false;
19102 bool vector_mode = VECTOR_MODE_P (mode);
19103 enum machine_mode vmode = mode;
19104
19105 if (vector_mode)
19106 use_sse = true;
19107 else if (mode == TFmode)
19108 use_sse = true;
19109 else if (TARGET_SSE_MATH)
19110 {
19111 use_sse = SSE_FLOAT_MODE_P (mode);
19112 if (mode == SFmode)
19113 vmode = V4SFmode;
19114 else if (mode == DFmode)
19115 vmode = V2DFmode;
19116 }
19117
19118 /* NEG and ABS performed with SSE use bitwise mask operations.
19119 Create the appropriate mask now. */
19120 if (use_sse)
19121 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19122 else
19123 mask = NULL_RTX;
19124
19125 dst = operands[0];
19126 src = operands[1];
19127
19128 set = gen_rtx_fmt_e (code, mode, src);
19129 set = gen_rtx_SET (VOIDmode, dst, set);
19130
19131 if (mask)
19132 {
19133 rtx use, clob;
19134 rtvec par;
19135
19136 use = gen_rtx_USE (VOIDmode, mask);
19137 if (vector_mode)
19138 par = gen_rtvec (2, set, use);
19139 else
19140 {
19141 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19142 par = gen_rtvec (3, set, use, clob);
19143 }
19144 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19145 }
19146 else
19147 emit_insn (set);
19148 }
19149
19150 /* Expand a copysign operation. Special case operand 0 being a constant. */
19151
19152 void
19153 ix86_expand_copysign (rtx operands[])
19154 {
19155 enum machine_mode mode, vmode;
19156 rtx dest, op0, op1, mask, nmask;
19157
19158 dest = operands[0];
19159 op0 = operands[1];
19160 op1 = operands[2];
19161
19162 mode = GET_MODE (dest);
19163
19164 if (mode == SFmode)
19165 vmode = V4SFmode;
19166 else if (mode == DFmode)
19167 vmode = V2DFmode;
19168 else
19169 vmode = mode;
19170
19171 if (GET_CODE (op0) == CONST_DOUBLE)
19172 {
19173 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19174
19175 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19176 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19177
19178 if (mode == SFmode || mode == DFmode)
19179 {
19180 if (op0 == CONST0_RTX (mode))
19181 op0 = CONST0_RTX (vmode);
19182 else
19183 {
19184 rtx v = ix86_build_const_vector (vmode, false, op0);
19185
19186 op0 = force_reg (vmode, v);
19187 }
19188 }
19189 else if (op0 != CONST0_RTX (mode))
19190 op0 = force_reg (mode, op0);
19191
19192 mask = ix86_build_signbit_mask (vmode, 0, 0);
19193
19194 if (mode == SFmode)
19195 copysign_insn = gen_copysignsf3_const;
19196 else if (mode == DFmode)
19197 copysign_insn = gen_copysigndf3_const;
19198 else
19199 copysign_insn = gen_copysigntf3_const;
19200
19201 emit_insn (copysign_insn (dest, op0, op1, mask));
19202 }
19203 else
19204 {
19205 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19206
19207 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19208 mask = ix86_build_signbit_mask (vmode, 0, 0);
19209
19210 if (mode == SFmode)
19211 copysign_insn = gen_copysignsf3_var;
19212 else if (mode == DFmode)
19213 copysign_insn = gen_copysigndf3_var;
19214 else
19215 copysign_insn = gen_copysigntf3_var;
19216
19217 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19218 }
19219 }
19220
19221 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19222 be a constant, and so has already been expanded into a vector constant. */
19223
19224 void
19225 ix86_split_copysign_const (rtx operands[])
19226 {
19227 enum machine_mode mode, vmode;
19228 rtx dest, op0, mask, x;
19229
19230 dest = operands[0];
19231 op0 = operands[1];
19232 mask = operands[3];
19233
19234 mode = GET_MODE (dest);
19235 vmode = GET_MODE (mask);
19236
19237 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19238 x = gen_rtx_AND (vmode, dest, mask);
19239 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19240
19241 if (op0 != CONST0_RTX (vmode))
19242 {
19243 x = gen_rtx_IOR (vmode, dest, op0);
19244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19245 }
19246 }
19247
19248 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19249 so we have to do two masks. */
19250
19251 void
19252 ix86_split_copysign_var (rtx operands[])
19253 {
19254 enum machine_mode mode, vmode;
19255 rtx dest, scratch, op0, op1, mask, nmask, x;
19256
19257 dest = operands[0];
19258 scratch = operands[1];
19259 op0 = operands[2];
19260 op1 = operands[3];
19261 nmask = operands[4];
19262 mask = operands[5];
19263
19264 mode = GET_MODE (dest);
19265 vmode = GET_MODE (mask);
19266
19267 if (rtx_equal_p (op0, op1))
19268 {
19269 /* Shouldn't happen often (it's useless, obviously), but when it does
19270 we'd generate incorrect code if we continue below. */
19271 emit_move_insn (dest, op0);
19272 return;
19273 }
19274
19275 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19276 {
19277 gcc_assert (REGNO (op1) == REGNO (scratch));
19278
19279 x = gen_rtx_AND (vmode, scratch, mask);
19280 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19281
19282 dest = mask;
19283 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19284 x = gen_rtx_NOT (vmode, dest);
19285 x = gen_rtx_AND (vmode, x, op0);
19286 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19287 }
19288 else
19289 {
19290 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19291 {
19292 x = gen_rtx_AND (vmode, scratch, mask);
19293 }
19294 else /* alternative 2,4 */
19295 {
19296 gcc_assert (REGNO (mask) == REGNO (scratch));
19297 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19298 x = gen_rtx_AND (vmode, scratch, op1);
19299 }
19300 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19301
19302 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19303 {
19304 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19305 x = gen_rtx_AND (vmode, dest, nmask);
19306 }
19307 else /* alternative 3,4 */
19308 {
19309 gcc_assert (REGNO (nmask) == REGNO (dest));
19310 dest = nmask;
19311 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19312 x = gen_rtx_AND (vmode, dest, op0);
19313 }
19314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19315 }
19316
19317 x = gen_rtx_IOR (vmode, dest, scratch);
19318 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19319 }
19320
19321 /* Return TRUE or FALSE depending on whether the first SET in INSN
19322 has source and destination with matching CC modes, and that the
19323 CC mode is at least as constrained as REQ_MODE. */
19324
19325 bool
19326 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19327 {
19328 rtx set;
19329 enum machine_mode set_mode;
19330
19331 set = PATTERN (insn);
19332 if (GET_CODE (set) == PARALLEL)
19333 set = XVECEXP (set, 0, 0);
19334 gcc_assert (GET_CODE (set) == SET);
19335 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19336
19337 set_mode = GET_MODE (SET_DEST (set));
19338 switch (set_mode)
19339 {
19340 case CCNOmode:
19341 if (req_mode != CCNOmode
19342 && (req_mode != CCmode
19343 || XEXP (SET_SRC (set), 1) != const0_rtx))
19344 return false;
19345 break;
19346 case CCmode:
19347 if (req_mode == CCGCmode)
19348 return false;
19349 /* FALLTHRU */
19350 case CCGCmode:
19351 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19352 return false;
19353 /* FALLTHRU */
19354 case CCGOCmode:
19355 if (req_mode == CCZmode)
19356 return false;
19357 /* FALLTHRU */
19358 case CCZmode:
19359 break;
19360
19361 case CCAmode:
19362 case CCCmode:
19363 case CCOmode:
19364 case CCSmode:
19365 if (set_mode != req_mode)
19366 return false;
19367 break;
19368
19369 default:
19370 gcc_unreachable ();
19371 }
19372
19373 return GET_MODE (SET_SRC (set)) == set_mode;
19374 }
19375
19376 /* Generate insn patterns to do an integer compare of OPERANDS. */
19377
19378 static rtx
19379 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19380 {
19381 enum machine_mode cmpmode;
19382 rtx tmp, flags;
19383
19384 cmpmode = SELECT_CC_MODE (code, op0, op1);
19385 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19386
19387 /* This is very simple, but making the interface the same as in the
19388 FP case makes the rest of the code easier. */
19389 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19390 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19391
19392 /* Return the test that should be put into the flags user, i.e.
19393 the bcc, scc, or cmov instruction. */
19394 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19395 }
19396
19397 /* Figure out whether to use ordered or unordered fp comparisons.
19398 Return the appropriate mode to use. */
19399
19400 enum machine_mode
19401 ix86_fp_compare_mode (enum rtx_code)
19402 {
19403 /* ??? In order to make all comparisons reversible, we do all comparisons
19404 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19405 all forms trapping and nontrapping comparisons, we can make inequality
19406 comparisons trapping again, since it results in better code when using
19407 FCOM based compares. */
19408 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19409 }
19410
19411 enum machine_mode
19412 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19413 {
19414 enum machine_mode mode = GET_MODE (op0);
19415
19416 if (SCALAR_FLOAT_MODE_P (mode))
19417 {
19418 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19419 return ix86_fp_compare_mode (code);
19420 }
19421
19422 switch (code)
19423 {
19424 /* Only zero flag is needed. */
19425 case EQ: /* ZF=0 */
19426 case NE: /* ZF!=0 */
19427 return CCZmode;
19428 /* Codes needing carry flag. */
19429 case GEU: /* CF=0 */
19430 case LTU: /* CF=1 */
19431 /* Detect overflow checks. They need just the carry flag. */
19432 if (GET_CODE (op0) == PLUS
19433 && rtx_equal_p (op1, XEXP (op0, 0)))
19434 return CCCmode;
19435 else
19436 return CCmode;
19437 case GTU: /* CF=0 & ZF=0 */
19438 case LEU: /* CF=1 | ZF=1 */
19439 return CCmode;
19440 /* Codes possibly doable only with sign flag when
19441 comparing against zero. */
19442 case GE: /* SF=OF or SF=0 */
19443 case LT: /* SF<>OF or SF=1 */
19444 if (op1 == const0_rtx)
19445 return CCGOCmode;
19446 else
19447 /* For other cases Carry flag is not required. */
19448 return CCGCmode;
19449 /* Codes doable only with sign flag when comparing
19450 against zero, but we miss jump instruction for it
19451 so we need to use relational tests against overflow
19452 that thus needs to be zero. */
19453 case GT: /* ZF=0 & SF=OF */
19454 case LE: /* ZF=1 | SF<>OF */
19455 if (op1 == const0_rtx)
19456 return CCNOmode;
19457 else
19458 return CCGCmode;
19459 /* strcmp pattern do (use flags) and combine may ask us for proper
19460 mode. */
19461 case USE:
19462 return CCmode;
19463 default:
19464 gcc_unreachable ();
19465 }
19466 }
19467
19468 /* Return the fixed registers used for condition codes. */
19469
19470 static bool
19471 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19472 {
19473 *p1 = FLAGS_REG;
19474 *p2 = FPSR_REG;
19475 return true;
19476 }
19477
19478 /* If two condition code modes are compatible, return a condition code
19479 mode which is compatible with both. Otherwise, return
19480 VOIDmode. */
19481
19482 static enum machine_mode
19483 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19484 {
19485 if (m1 == m2)
19486 return m1;
19487
19488 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19489 return VOIDmode;
19490
19491 if ((m1 == CCGCmode && m2 == CCGOCmode)
19492 || (m1 == CCGOCmode && m2 == CCGCmode))
19493 return CCGCmode;
19494
19495 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19496 return m2;
19497 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19498 return m1;
19499
19500 switch (m1)
19501 {
19502 default:
19503 gcc_unreachable ();
19504
19505 case CCmode:
19506 case CCGCmode:
19507 case CCGOCmode:
19508 case CCNOmode:
19509 case CCAmode:
19510 case CCCmode:
19511 case CCOmode:
19512 case CCSmode:
19513 case CCZmode:
19514 switch (m2)
19515 {
19516 default:
19517 return VOIDmode;
19518
19519 case CCmode:
19520 case CCGCmode:
19521 case CCGOCmode:
19522 case CCNOmode:
19523 case CCAmode:
19524 case CCCmode:
19525 case CCOmode:
19526 case CCSmode:
19527 case CCZmode:
19528 return CCmode;
19529 }
19530
19531 case CCFPmode:
19532 case CCFPUmode:
19533 /* These are only compatible with themselves, which we already
19534 checked above. */
19535 return VOIDmode;
19536 }
19537 }
19538
19539
19540 /* Return a comparison we can do and that it is equivalent to
19541 swap_condition (code) apart possibly from orderedness.
19542 But, never change orderedness if TARGET_IEEE_FP, returning
19543 UNKNOWN in that case if necessary. */
19544
19545 static enum rtx_code
19546 ix86_fp_swap_condition (enum rtx_code code)
19547 {
19548 switch (code)
19549 {
19550 case GT: /* GTU - CF=0 & ZF=0 */
19551 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19552 case GE: /* GEU - CF=0 */
19553 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19554 case UNLT: /* LTU - CF=1 */
19555 return TARGET_IEEE_FP ? UNKNOWN : GT;
19556 case UNLE: /* LEU - CF=1 | ZF=1 */
19557 return TARGET_IEEE_FP ? UNKNOWN : GE;
19558 default:
19559 return swap_condition (code);
19560 }
19561 }
19562
19563 /* Return cost of comparison CODE using the best strategy for performance.
19564 All following functions do use number of instructions as a cost metrics.
19565 In future this should be tweaked to compute bytes for optimize_size and
19566 take into account performance of various instructions on various CPUs. */
19567
19568 static int
19569 ix86_fp_comparison_cost (enum rtx_code code)
19570 {
19571 int arith_cost;
19572
19573 /* The cost of code using bit-twiddling on %ah. */
19574 switch (code)
19575 {
19576 case UNLE:
19577 case UNLT:
19578 case LTGT:
19579 case GT:
19580 case GE:
19581 case UNORDERED:
19582 case ORDERED:
19583 case UNEQ:
19584 arith_cost = 4;
19585 break;
19586 case LT:
19587 case NE:
19588 case EQ:
19589 case UNGE:
19590 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19591 break;
19592 case LE:
19593 case UNGT:
19594 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19595 break;
19596 default:
19597 gcc_unreachable ();
19598 }
19599
19600 switch (ix86_fp_comparison_strategy (code))
19601 {
19602 case IX86_FPCMP_COMI:
19603 return arith_cost > 4 ? 3 : 2;
19604 case IX86_FPCMP_SAHF:
19605 return arith_cost > 4 ? 4 : 3;
19606 default:
19607 return arith_cost;
19608 }
19609 }
19610
19611 /* Return strategy to use for floating-point. We assume that fcomi is always
19612 preferrable where available, since that is also true when looking at size
19613 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19614
19615 enum ix86_fpcmp_strategy
19616 ix86_fp_comparison_strategy (enum rtx_code)
19617 {
19618 /* Do fcomi/sahf based test when profitable. */
19619
19620 if (TARGET_CMOVE)
19621 return IX86_FPCMP_COMI;
19622
19623 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19624 return IX86_FPCMP_SAHF;
19625
19626 return IX86_FPCMP_ARITH;
19627 }
19628
19629 /* Swap, force into registers, or otherwise massage the two operands
19630 to a fp comparison. The operands are updated in place; the new
19631 comparison code is returned. */
19632
19633 static enum rtx_code
19634 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19635 {
19636 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19637 rtx op0 = *pop0, op1 = *pop1;
19638 enum machine_mode op_mode = GET_MODE (op0);
19639 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19640
19641 /* All of the unordered compare instructions only work on registers.
19642 The same is true of the fcomi compare instructions. The XFmode
19643 compare instructions require registers except when comparing
19644 against zero or when converting operand 1 from fixed point to
19645 floating point. */
19646
19647 if (!is_sse
19648 && (fpcmp_mode == CCFPUmode
19649 || (op_mode == XFmode
19650 && ! (standard_80387_constant_p (op0) == 1
19651 || standard_80387_constant_p (op1) == 1)
19652 && GET_CODE (op1) != FLOAT)
19653 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19654 {
19655 op0 = force_reg (op_mode, op0);
19656 op1 = force_reg (op_mode, op1);
19657 }
19658 else
19659 {
19660 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19661 things around if they appear profitable, otherwise force op0
19662 into a register. */
19663
19664 if (standard_80387_constant_p (op0) == 0
19665 || (MEM_P (op0)
19666 && ! (standard_80387_constant_p (op1) == 0
19667 || MEM_P (op1))))
19668 {
19669 enum rtx_code new_code = ix86_fp_swap_condition (code);
19670 if (new_code != UNKNOWN)
19671 {
19672 rtx tmp;
19673 tmp = op0, op0 = op1, op1 = tmp;
19674 code = new_code;
19675 }
19676 }
19677
19678 if (!REG_P (op0))
19679 op0 = force_reg (op_mode, op0);
19680
19681 if (CONSTANT_P (op1))
19682 {
19683 int tmp = standard_80387_constant_p (op1);
19684 if (tmp == 0)
19685 op1 = validize_mem (force_const_mem (op_mode, op1));
19686 else if (tmp == 1)
19687 {
19688 if (TARGET_CMOVE)
19689 op1 = force_reg (op_mode, op1);
19690 }
19691 else
19692 op1 = force_reg (op_mode, op1);
19693 }
19694 }
19695
19696 /* Try to rearrange the comparison to make it cheaper. */
19697 if (ix86_fp_comparison_cost (code)
19698 > ix86_fp_comparison_cost (swap_condition (code))
19699 && (REG_P (op1) || can_create_pseudo_p ()))
19700 {
19701 rtx tmp;
19702 tmp = op0, op0 = op1, op1 = tmp;
19703 code = swap_condition (code);
19704 if (!REG_P (op0))
19705 op0 = force_reg (op_mode, op0);
19706 }
19707
19708 *pop0 = op0;
19709 *pop1 = op1;
19710 return code;
19711 }
19712
19713 /* Convert comparison codes we use to represent FP comparison to integer
19714 code that will result in proper branch. Return UNKNOWN if no such code
19715 is available. */
19716
19717 enum rtx_code
19718 ix86_fp_compare_code_to_integer (enum rtx_code code)
19719 {
19720 switch (code)
19721 {
19722 case GT:
19723 return GTU;
19724 case GE:
19725 return GEU;
19726 case ORDERED:
19727 case UNORDERED:
19728 return code;
19729 break;
19730 case UNEQ:
19731 return EQ;
19732 break;
19733 case UNLT:
19734 return LTU;
19735 break;
19736 case UNLE:
19737 return LEU;
19738 break;
19739 case LTGT:
19740 return NE;
19741 break;
19742 default:
19743 return UNKNOWN;
19744 }
19745 }
19746
19747 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19748
19749 static rtx
19750 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19751 {
19752 enum machine_mode fpcmp_mode, intcmp_mode;
19753 rtx tmp, tmp2;
19754
19755 fpcmp_mode = ix86_fp_compare_mode (code);
19756 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19757
19758 /* Do fcomi/sahf based test when profitable. */
19759 switch (ix86_fp_comparison_strategy (code))
19760 {
19761 case IX86_FPCMP_COMI:
19762 intcmp_mode = fpcmp_mode;
19763 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19764 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19765 tmp);
19766 emit_insn (tmp);
19767 break;
19768
19769 case IX86_FPCMP_SAHF:
19770 intcmp_mode = fpcmp_mode;
19771 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19772 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19773 tmp);
19774
19775 if (!scratch)
19776 scratch = gen_reg_rtx (HImode);
19777 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19778 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19779 break;
19780
19781 case IX86_FPCMP_ARITH:
19782 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19783 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19784 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19785 if (!scratch)
19786 scratch = gen_reg_rtx (HImode);
19787 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19788
19789 /* In the unordered case, we have to check C2 for NaN's, which
19790 doesn't happen to work out to anything nice combination-wise.
19791 So do some bit twiddling on the value we've got in AH to come
19792 up with an appropriate set of condition codes. */
19793
19794 intcmp_mode = CCNOmode;
19795 switch (code)
19796 {
19797 case GT:
19798 case UNGT:
19799 if (code == GT || !TARGET_IEEE_FP)
19800 {
19801 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19802 code = EQ;
19803 }
19804 else
19805 {
19806 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19807 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19808 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19809 intcmp_mode = CCmode;
19810 code = GEU;
19811 }
19812 break;
19813 case LT:
19814 case UNLT:
19815 if (code == LT && TARGET_IEEE_FP)
19816 {
19817 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19818 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19819 intcmp_mode = CCmode;
19820 code = EQ;
19821 }
19822 else
19823 {
19824 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19825 code = NE;
19826 }
19827 break;
19828 case GE:
19829 case UNGE:
19830 if (code == GE || !TARGET_IEEE_FP)
19831 {
19832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19833 code = EQ;
19834 }
19835 else
19836 {
19837 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19838 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19839 code = NE;
19840 }
19841 break;
19842 case LE:
19843 case UNLE:
19844 if (code == LE && TARGET_IEEE_FP)
19845 {
19846 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19847 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19848 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19849 intcmp_mode = CCmode;
19850 code = LTU;
19851 }
19852 else
19853 {
19854 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19855 code = NE;
19856 }
19857 break;
19858 case EQ:
19859 case UNEQ:
19860 if (code == EQ && TARGET_IEEE_FP)
19861 {
19862 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19863 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19864 intcmp_mode = CCmode;
19865 code = EQ;
19866 }
19867 else
19868 {
19869 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19870 code = NE;
19871 }
19872 break;
19873 case NE:
19874 case LTGT:
19875 if (code == NE && TARGET_IEEE_FP)
19876 {
19877 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19878 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19879 GEN_INT (0x40)));
19880 code = NE;
19881 }
19882 else
19883 {
19884 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19885 code = EQ;
19886 }
19887 break;
19888
19889 case UNORDERED:
19890 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19891 code = NE;
19892 break;
19893 case ORDERED:
19894 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19895 code = EQ;
19896 break;
19897
19898 default:
19899 gcc_unreachable ();
19900 }
19901 break;
19902
19903 default:
19904 gcc_unreachable();
19905 }
19906
19907 /* Return the test that should be put into the flags user, i.e.
19908 the bcc, scc, or cmov instruction. */
19909 return gen_rtx_fmt_ee (code, VOIDmode,
19910 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19911 const0_rtx);
19912 }
19913
19914 static rtx
19915 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19916 {
19917 rtx ret;
19918
19919 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19920 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19921
19922 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19923 {
19924 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19925 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19926 }
19927 else
19928 ret = ix86_expand_int_compare (code, op0, op1);
19929
19930 return ret;
19931 }
19932
19933 void
19934 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19935 {
19936 enum machine_mode mode = GET_MODE (op0);
19937 rtx tmp;
19938
19939 switch (mode)
19940 {
19941 case SFmode:
19942 case DFmode:
19943 case XFmode:
19944 case QImode:
19945 case HImode:
19946 case SImode:
19947 simple:
19948 tmp = ix86_expand_compare (code, op0, op1);
19949 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19950 gen_rtx_LABEL_REF (VOIDmode, label),
19951 pc_rtx);
19952 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19953 return;
19954
19955 case DImode:
19956 if (TARGET_64BIT)
19957 goto simple;
19958 case TImode:
19959 /* Expand DImode branch into multiple compare+branch. */
19960 {
19961 rtx lo[2], hi[2];
19962 rtx_code_label *label2;
19963 enum rtx_code code1, code2, code3;
19964 enum machine_mode submode;
19965
19966 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19967 {
19968 tmp = op0, op0 = op1, op1 = tmp;
19969 code = swap_condition (code);
19970 }
19971
19972 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19973 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19974
19975 submode = mode == DImode ? SImode : DImode;
19976
19977 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19978 avoid two branches. This costs one extra insn, so disable when
19979 optimizing for size. */
19980
19981 if ((code == EQ || code == NE)
19982 && (!optimize_insn_for_size_p ()
19983 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19984 {
19985 rtx xor0, xor1;
19986
19987 xor1 = hi[0];
19988 if (hi[1] != const0_rtx)
19989 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19990 NULL_RTX, 0, OPTAB_WIDEN);
19991
19992 xor0 = lo[0];
19993 if (lo[1] != const0_rtx)
19994 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19995 NULL_RTX, 0, OPTAB_WIDEN);
19996
19997 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19998 NULL_RTX, 0, OPTAB_WIDEN);
19999
20000 ix86_expand_branch (code, tmp, const0_rtx, label);
20001 return;
20002 }
20003
20004 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20005 op1 is a constant and the low word is zero, then we can just
20006 examine the high word. Similarly for low word -1 and
20007 less-or-equal-than or greater-than. */
20008
20009 if (CONST_INT_P (hi[1]))
20010 switch (code)
20011 {
20012 case LT: case LTU: case GE: case GEU:
20013 if (lo[1] == const0_rtx)
20014 {
20015 ix86_expand_branch (code, hi[0], hi[1], label);
20016 return;
20017 }
20018 break;
20019 case LE: case LEU: case GT: case GTU:
20020 if (lo[1] == constm1_rtx)
20021 {
20022 ix86_expand_branch (code, hi[0], hi[1], label);
20023 return;
20024 }
20025 break;
20026 default:
20027 break;
20028 }
20029
20030 /* Otherwise, we need two or three jumps. */
20031
20032 label2 = gen_label_rtx ();
20033
20034 code1 = code;
20035 code2 = swap_condition (code);
20036 code3 = unsigned_condition (code);
20037
20038 switch (code)
20039 {
20040 case LT: case GT: case LTU: case GTU:
20041 break;
20042
20043 case LE: code1 = LT; code2 = GT; break;
20044 case GE: code1 = GT; code2 = LT; break;
20045 case LEU: code1 = LTU; code2 = GTU; break;
20046 case GEU: code1 = GTU; code2 = LTU; break;
20047
20048 case EQ: code1 = UNKNOWN; code2 = NE; break;
20049 case NE: code2 = UNKNOWN; break;
20050
20051 default:
20052 gcc_unreachable ();
20053 }
20054
20055 /*
20056 * a < b =>
20057 * if (hi(a) < hi(b)) goto true;
20058 * if (hi(a) > hi(b)) goto false;
20059 * if (lo(a) < lo(b)) goto true;
20060 * false:
20061 */
20062
20063 if (code1 != UNKNOWN)
20064 ix86_expand_branch (code1, hi[0], hi[1], label);
20065 if (code2 != UNKNOWN)
20066 ix86_expand_branch (code2, hi[0], hi[1], label2);
20067
20068 ix86_expand_branch (code3, lo[0], lo[1], label);
20069
20070 if (code2 != UNKNOWN)
20071 emit_label (label2);
20072 return;
20073 }
20074
20075 default:
20076 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20077 goto simple;
20078 }
20079 }
20080
20081 /* Split branch based on floating point condition. */
20082 void
20083 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20084 rtx target1, rtx target2, rtx tmp)
20085 {
20086 rtx condition;
20087 rtx i;
20088
20089 if (target2 != pc_rtx)
20090 {
20091 rtx tmp = target2;
20092 code = reverse_condition_maybe_unordered (code);
20093 target2 = target1;
20094 target1 = tmp;
20095 }
20096
20097 condition = ix86_expand_fp_compare (code, op1, op2,
20098 tmp);
20099
20100 i = emit_jump_insn (gen_rtx_SET
20101 (VOIDmode, pc_rtx,
20102 gen_rtx_IF_THEN_ELSE (VOIDmode,
20103 condition, target1, target2)));
20104 if (split_branch_probability >= 0)
20105 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20106 }
20107
20108 void
20109 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20110 {
20111 rtx ret;
20112
20113 gcc_assert (GET_MODE (dest) == QImode);
20114
20115 ret = ix86_expand_compare (code, op0, op1);
20116 PUT_MODE (ret, QImode);
20117 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20118 }
20119
20120 /* Expand comparison setting or clearing carry flag. Return true when
20121 successful and set pop for the operation. */
20122 static bool
20123 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20124 {
20125 enum machine_mode mode =
20126 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20127
20128 /* Do not handle double-mode compares that go through special path. */
20129 if (mode == (TARGET_64BIT ? TImode : DImode))
20130 return false;
20131
20132 if (SCALAR_FLOAT_MODE_P (mode))
20133 {
20134 rtx compare_op;
20135 rtx_insn *compare_seq;
20136
20137 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20138
20139 /* Shortcut: following common codes never translate
20140 into carry flag compares. */
20141 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20142 || code == ORDERED || code == UNORDERED)
20143 return false;
20144
20145 /* These comparisons require zero flag; swap operands so they won't. */
20146 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20147 && !TARGET_IEEE_FP)
20148 {
20149 rtx tmp = op0;
20150 op0 = op1;
20151 op1 = tmp;
20152 code = swap_condition (code);
20153 }
20154
20155 /* Try to expand the comparison and verify that we end up with
20156 carry flag based comparison. This fails to be true only when
20157 we decide to expand comparison using arithmetic that is not
20158 too common scenario. */
20159 start_sequence ();
20160 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20161 compare_seq = get_insns ();
20162 end_sequence ();
20163
20164 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20165 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20166 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20167 else
20168 code = GET_CODE (compare_op);
20169
20170 if (code != LTU && code != GEU)
20171 return false;
20172
20173 emit_insn (compare_seq);
20174 *pop = compare_op;
20175 return true;
20176 }
20177
20178 if (!INTEGRAL_MODE_P (mode))
20179 return false;
20180
20181 switch (code)
20182 {
20183 case LTU:
20184 case GEU:
20185 break;
20186
20187 /* Convert a==0 into (unsigned)a<1. */
20188 case EQ:
20189 case NE:
20190 if (op1 != const0_rtx)
20191 return false;
20192 op1 = const1_rtx;
20193 code = (code == EQ ? LTU : GEU);
20194 break;
20195
20196 /* Convert a>b into b<a or a>=b-1. */
20197 case GTU:
20198 case LEU:
20199 if (CONST_INT_P (op1))
20200 {
20201 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20202 /* Bail out on overflow. We still can swap operands but that
20203 would force loading of the constant into register. */
20204 if (op1 == const0_rtx
20205 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20206 return false;
20207 code = (code == GTU ? GEU : LTU);
20208 }
20209 else
20210 {
20211 rtx tmp = op1;
20212 op1 = op0;
20213 op0 = tmp;
20214 code = (code == GTU ? LTU : GEU);
20215 }
20216 break;
20217
20218 /* Convert a>=0 into (unsigned)a<0x80000000. */
20219 case LT:
20220 case GE:
20221 if (mode == DImode || op1 != const0_rtx)
20222 return false;
20223 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20224 code = (code == LT ? GEU : LTU);
20225 break;
20226 case LE:
20227 case GT:
20228 if (mode == DImode || op1 != constm1_rtx)
20229 return false;
20230 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20231 code = (code == LE ? GEU : LTU);
20232 break;
20233
20234 default:
20235 return false;
20236 }
20237 /* Swapping operands may cause constant to appear as first operand. */
20238 if (!nonimmediate_operand (op0, VOIDmode))
20239 {
20240 if (!can_create_pseudo_p ())
20241 return false;
20242 op0 = force_reg (mode, op0);
20243 }
20244 *pop = ix86_expand_compare (code, op0, op1);
20245 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20246 return true;
20247 }
20248
20249 bool
20250 ix86_expand_int_movcc (rtx operands[])
20251 {
20252 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20253 rtx_insn *compare_seq;
20254 rtx compare_op;
20255 enum machine_mode mode = GET_MODE (operands[0]);
20256 bool sign_bit_compare_p = false;
20257 rtx op0 = XEXP (operands[1], 0);
20258 rtx op1 = XEXP (operands[1], 1);
20259
20260 if (GET_MODE (op0) == TImode
20261 || (GET_MODE (op0) == DImode
20262 && !TARGET_64BIT))
20263 return false;
20264
20265 start_sequence ();
20266 compare_op = ix86_expand_compare (code, op0, op1);
20267 compare_seq = get_insns ();
20268 end_sequence ();
20269
20270 compare_code = GET_CODE (compare_op);
20271
20272 if ((op1 == const0_rtx && (code == GE || code == LT))
20273 || (op1 == constm1_rtx && (code == GT || code == LE)))
20274 sign_bit_compare_p = true;
20275
20276 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20277 HImode insns, we'd be swallowed in word prefix ops. */
20278
20279 if ((mode != HImode || TARGET_FAST_PREFIX)
20280 && (mode != (TARGET_64BIT ? TImode : DImode))
20281 && CONST_INT_P (operands[2])
20282 && CONST_INT_P (operands[3]))
20283 {
20284 rtx out = operands[0];
20285 HOST_WIDE_INT ct = INTVAL (operands[2]);
20286 HOST_WIDE_INT cf = INTVAL (operands[3]);
20287 HOST_WIDE_INT diff;
20288
20289 diff = ct - cf;
20290 /* Sign bit compares are better done using shifts than we do by using
20291 sbb. */
20292 if (sign_bit_compare_p
20293 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20294 {
20295 /* Detect overlap between destination and compare sources. */
20296 rtx tmp = out;
20297
20298 if (!sign_bit_compare_p)
20299 {
20300 rtx flags;
20301 bool fpcmp = false;
20302
20303 compare_code = GET_CODE (compare_op);
20304
20305 flags = XEXP (compare_op, 0);
20306
20307 if (GET_MODE (flags) == CCFPmode
20308 || GET_MODE (flags) == CCFPUmode)
20309 {
20310 fpcmp = true;
20311 compare_code
20312 = ix86_fp_compare_code_to_integer (compare_code);
20313 }
20314
20315 /* To simplify rest of code, restrict to the GEU case. */
20316 if (compare_code == LTU)
20317 {
20318 HOST_WIDE_INT tmp = ct;
20319 ct = cf;
20320 cf = tmp;
20321 compare_code = reverse_condition (compare_code);
20322 code = reverse_condition (code);
20323 }
20324 else
20325 {
20326 if (fpcmp)
20327 PUT_CODE (compare_op,
20328 reverse_condition_maybe_unordered
20329 (GET_CODE (compare_op)));
20330 else
20331 PUT_CODE (compare_op,
20332 reverse_condition (GET_CODE (compare_op)));
20333 }
20334 diff = ct - cf;
20335
20336 if (reg_overlap_mentioned_p (out, op0)
20337 || reg_overlap_mentioned_p (out, op1))
20338 tmp = gen_reg_rtx (mode);
20339
20340 if (mode == DImode)
20341 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20342 else
20343 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20344 flags, compare_op));
20345 }
20346 else
20347 {
20348 if (code == GT || code == GE)
20349 code = reverse_condition (code);
20350 else
20351 {
20352 HOST_WIDE_INT tmp = ct;
20353 ct = cf;
20354 cf = tmp;
20355 diff = ct - cf;
20356 }
20357 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20358 }
20359
20360 if (diff == 1)
20361 {
20362 /*
20363 * cmpl op0,op1
20364 * sbbl dest,dest
20365 * [addl dest, ct]
20366 *
20367 * Size 5 - 8.
20368 */
20369 if (ct)
20370 tmp = expand_simple_binop (mode, PLUS,
20371 tmp, GEN_INT (ct),
20372 copy_rtx (tmp), 1, OPTAB_DIRECT);
20373 }
20374 else if (cf == -1)
20375 {
20376 /*
20377 * cmpl op0,op1
20378 * sbbl dest,dest
20379 * orl $ct, dest
20380 *
20381 * Size 8.
20382 */
20383 tmp = expand_simple_binop (mode, IOR,
20384 tmp, GEN_INT (ct),
20385 copy_rtx (tmp), 1, OPTAB_DIRECT);
20386 }
20387 else if (diff == -1 && ct)
20388 {
20389 /*
20390 * cmpl op0,op1
20391 * sbbl dest,dest
20392 * notl dest
20393 * [addl dest, cf]
20394 *
20395 * Size 8 - 11.
20396 */
20397 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20398 if (cf)
20399 tmp = expand_simple_binop (mode, PLUS,
20400 copy_rtx (tmp), GEN_INT (cf),
20401 copy_rtx (tmp), 1, OPTAB_DIRECT);
20402 }
20403 else
20404 {
20405 /*
20406 * cmpl op0,op1
20407 * sbbl dest,dest
20408 * [notl dest]
20409 * andl cf - ct, dest
20410 * [addl dest, ct]
20411 *
20412 * Size 8 - 11.
20413 */
20414
20415 if (cf == 0)
20416 {
20417 cf = ct;
20418 ct = 0;
20419 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20420 }
20421
20422 tmp = expand_simple_binop (mode, AND,
20423 copy_rtx (tmp),
20424 gen_int_mode (cf - ct, mode),
20425 copy_rtx (tmp), 1, OPTAB_DIRECT);
20426 if (ct)
20427 tmp = expand_simple_binop (mode, PLUS,
20428 copy_rtx (tmp), GEN_INT (ct),
20429 copy_rtx (tmp), 1, OPTAB_DIRECT);
20430 }
20431
20432 if (!rtx_equal_p (tmp, out))
20433 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20434
20435 return true;
20436 }
20437
20438 if (diff < 0)
20439 {
20440 enum machine_mode cmp_mode = GET_MODE (op0);
20441
20442 HOST_WIDE_INT tmp;
20443 tmp = ct, ct = cf, cf = tmp;
20444 diff = -diff;
20445
20446 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20447 {
20448 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20449
20450 /* We may be reversing unordered compare to normal compare, that
20451 is not valid in general (we may convert non-trapping condition
20452 to trapping one), however on i386 we currently emit all
20453 comparisons unordered. */
20454 compare_code = reverse_condition_maybe_unordered (compare_code);
20455 code = reverse_condition_maybe_unordered (code);
20456 }
20457 else
20458 {
20459 compare_code = reverse_condition (compare_code);
20460 code = reverse_condition (code);
20461 }
20462 }
20463
20464 compare_code = UNKNOWN;
20465 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20466 && CONST_INT_P (op1))
20467 {
20468 if (op1 == const0_rtx
20469 && (code == LT || code == GE))
20470 compare_code = code;
20471 else if (op1 == constm1_rtx)
20472 {
20473 if (code == LE)
20474 compare_code = LT;
20475 else if (code == GT)
20476 compare_code = GE;
20477 }
20478 }
20479
20480 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20481 if (compare_code != UNKNOWN
20482 && GET_MODE (op0) == GET_MODE (out)
20483 && (cf == -1 || ct == -1))
20484 {
20485 /* If lea code below could be used, only optimize
20486 if it results in a 2 insn sequence. */
20487
20488 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20489 || diff == 3 || diff == 5 || diff == 9)
20490 || (compare_code == LT && ct == -1)
20491 || (compare_code == GE && cf == -1))
20492 {
20493 /*
20494 * notl op1 (if necessary)
20495 * sarl $31, op1
20496 * orl cf, op1
20497 */
20498 if (ct != -1)
20499 {
20500 cf = ct;
20501 ct = -1;
20502 code = reverse_condition (code);
20503 }
20504
20505 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20506
20507 out = expand_simple_binop (mode, IOR,
20508 out, GEN_INT (cf),
20509 out, 1, OPTAB_DIRECT);
20510 if (out != operands[0])
20511 emit_move_insn (operands[0], out);
20512
20513 return true;
20514 }
20515 }
20516
20517
20518 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20519 || diff == 3 || diff == 5 || diff == 9)
20520 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20521 && (mode != DImode
20522 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20523 {
20524 /*
20525 * xorl dest,dest
20526 * cmpl op1,op2
20527 * setcc dest
20528 * lea cf(dest*(ct-cf)),dest
20529 *
20530 * Size 14.
20531 *
20532 * This also catches the degenerate setcc-only case.
20533 */
20534
20535 rtx tmp;
20536 int nops;
20537
20538 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20539
20540 nops = 0;
20541 /* On x86_64 the lea instruction operates on Pmode, so we need
20542 to get arithmetics done in proper mode to match. */
20543 if (diff == 1)
20544 tmp = copy_rtx (out);
20545 else
20546 {
20547 rtx out1;
20548 out1 = copy_rtx (out);
20549 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20550 nops++;
20551 if (diff & 1)
20552 {
20553 tmp = gen_rtx_PLUS (mode, tmp, out1);
20554 nops++;
20555 }
20556 }
20557 if (cf != 0)
20558 {
20559 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20560 nops++;
20561 }
20562 if (!rtx_equal_p (tmp, out))
20563 {
20564 if (nops == 1)
20565 out = force_operand (tmp, copy_rtx (out));
20566 else
20567 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20568 }
20569 if (!rtx_equal_p (out, operands[0]))
20570 emit_move_insn (operands[0], copy_rtx (out));
20571
20572 return true;
20573 }
20574
20575 /*
20576 * General case: Jumpful:
20577 * xorl dest,dest cmpl op1, op2
20578 * cmpl op1, op2 movl ct, dest
20579 * setcc dest jcc 1f
20580 * decl dest movl cf, dest
20581 * andl (cf-ct),dest 1:
20582 * addl ct,dest
20583 *
20584 * Size 20. Size 14.
20585 *
20586 * This is reasonably steep, but branch mispredict costs are
20587 * high on modern cpus, so consider failing only if optimizing
20588 * for space.
20589 */
20590
20591 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20592 && BRANCH_COST (optimize_insn_for_speed_p (),
20593 false) >= 2)
20594 {
20595 if (cf == 0)
20596 {
20597 enum machine_mode cmp_mode = GET_MODE (op0);
20598
20599 cf = ct;
20600 ct = 0;
20601
20602 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20603 {
20604 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20605
20606 /* We may be reversing unordered compare to normal compare,
20607 that is not valid in general (we may convert non-trapping
20608 condition to trapping one), however on i386 we currently
20609 emit all comparisons unordered. */
20610 code = reverse_condition_maybe_unordered (code);
20611 }
20612 else
20613 {
20614 code = reverse_condition (code);
20615 if (compare_code != UNKNOWN)
20616 compare_code = reverse_condition (compare_code);
20617 }
20618 }
20619
20620 if (compare_code != UNKNOWN)
20621 {
20622 /* notl op1 (if needed)
20623 sarl $31, op1
20624 andl (cf-ct), op1
20625 addl ct, op1
20626
20627 For x < 0 (resp. x <= -1) there will be no notl,
20628 so if possible swap the constants to get rid of the
20629 complement.
20630 True/false will be -1/0 while code below (store flag
20631 followed by decrement) is 0/-1, so the constants need
20632 to be exchanged once more. */
20633
20634 if (compare_code == GE || !cf)
20635 {
20636 code = reverse_condition (code);
20637 compare_code = LT;
20638 }
20639 else
20640 {
20641 HOST_WIDE_INT tmp = cf;
20642 cf = ct;
20643 ct = tmp;
20644 }
20645
20646 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20647 }
20648 else
20649 {
20650 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20651
20652 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20653 constm1_rtx,
20654 copy_rtx (out), 1, OPTAB_DIRECT);
20655 }
20656
20657 out = expand_simple_binop (mode, AND, copy_rtx (out),
20658 gen_int_mode (cf - ct, mode),
20659 copy_rtx (out), 1, OPTAB_DIRECT);
20660 if (ct)
20661 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20662 copy_rtx (out), 1, OPTAB_DIRECT);
20663 if (!rtx_equal_p (out, operands[0]))
20664 emit_move_insn (operands[0], copy_rtx (out));
20665
20666 return true;
20667 }
20668 }
20669
20670 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20671 {
20672 /* Try a few things more with specific constants and a variable. */
20673
20674 optab op;
20675 rtx var, orig_out, out, tmp;
20676
20677 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20678 return false;
20679
20680 /* If one of the two operands is an interesting constant, load a
20681 constant with the above and mask it in with a logical operation. */
20682
20683 if (CONST_INT_P (operands[2]))
20684 {
20685 var = operands[3];
20686 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20687 operands[3] = constm1_rtx, op = and_optab;
20688 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20689 operands[3] = const0_rtx, op = ior_optab;
20690 else
20691 return false;
20692 }
20693 else if (CONST_INT_P (operands[3]))
20694 {
20695 var = operands[2];
20696 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20697 operands[2] = constm1_rtx, op = and_optab;
20698 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20699 operands[2] = const0_rtx, op = ior_optab;
20700 else
20701 return false;
20702 }
20703 else
20704 return false;
20705
20706 orig_out = operands[0];
20707 tmp = gen_reg_rtx (mode);
20708 operands[0] = tmp;
20709
20710 /* Recurse to get the constant loaded. */
20711 if (ix86_expand_int_movcc (operands) == 0)
20712 return false;
20713
20714 /* Mask in the interesting variable. */
20715 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20716 OPTAB_WIDEN);
20717 if (!rtx_equal_p (out, orig_out))
20718 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20719
20720 return true;
20721 }
20722
20723 /*
20724 * For comparison with above,
20725 *
20726 * movl cf,dest
20727 * movl ct,tmp
20728 * cmpl op1,op2
20729 * cmovcc tmp,dest
20730 *
20731 * Size 15.
20732 */
20733
20734 if (! nonimmediate_operand (operands[2], mode))
20735 operands[2] = force_reg (mode, operands[2]);
20736 if (! nonimmediate_operand (operands[3], mode))
20737 operands[3] = force_reg (mode, operands[3]);
20738
20739 if (! register_operand (operands[2], VOIDmode)
20740 && (mode == QImode
20741 || ! register_operand (operands[3], VOIDmode)))
20742 operands[2] = force_reg (mode, operands[2]);
20743
20744 if (mode == QImode
20745 && ! register_operand (operands[3], VOIDmode))
20746 operands[3] = force_reg (mode, operands[3]);
20747
20748 emit_insn (compare_seq);
20749 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20750 gen_rtx_IF_THEN_ELSE (mode,
20751 compare_op, operands[2],
20752 operands[3])));
20753 return true;
20754 }
20755
20756 /* Swap, force into registers, or otherwise massage the two operands
20757 to an sse comparison with a mask result. Thus we differ a bit from
20758 ix86_prepare_fp_compare_args which expects to produce a flags result.
20759
20760 The DEST operand exists to help determine whether to commute commutative
20761 operators. The POP0/POP1 operands are updated in place. The new
20762 comparison code is returned, or UNKNOWN if not implementable. */
20763
20764 static enum rtx_code
20765 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20766 rtx *pop0, rtx *pop1)
20767 {
20768 rtx tmp;
20769
20770 switch (code)
20771 {
20772 case LTGT:
20773 case UNEQ:
20774 /* AVX supports all the needed comparisons. */
20775 if (TARGET_AVX)
20776 break;
20777 /* We have no LTGT as an operator. We could implement it with
20778 NE & ORDERED, but this requires an extra temporary. It's
20779 not clear that it's worth it. */
20780 return UNKNOWN;
20781
20782 case LT:
20783 case LE:
20784 case UNGT:
20785 case UNGE:
20786 /* These are supported directly. */
20787 break;
20788
20789 case EQ:
20790 case NE:
20791 case UNORDERED:
20792 case ORDERED:
20793 /* AVX has 3 operand comparisons, no need to swap anything. */
20794 if (TARGET_AVX)
20795 break;
20796 /* For commutative operators, try to canonicalize the destination
20797 operand to be first in the comparison - this helps reload to
20798 avoid extra moves. */
20799 if (!dest || !rtx_equal_p (dest, *pop1))
20800 break;
20801 /* FALLTHRU */
20802
20803 case GE:
20804 case GT:
20805 case UNLE:
20806 case UNLT:
20807 /* These are not supported directly before AVX, and furthermore
20808 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20809 comparison operands to transform into something that is
20810 supported. */
20811 tmp = *pop0;
20812 *pop0 = *pop1;
20813 *pop1 = tmp;
20814 code = swap_condition (code);
20815 break;
20816
20817 default:
20818 gcc_unreachable ();
20819 }
20820
20821 return code;
20822 }
20823
20824 /* Detect conditional moves that exactly match min/max operational
20825 semantics. Note that this is IEEE safe, as long as we don't
20826 interchange the operands.
20827
20828 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20829 and TRUE if the operation is successful and instructions are emitted. */
20830
20831 static bool
20832 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20833 rtx cmp_op1, rtx if_true, rtx if_false)
20834 {
20835 enum machine_mode mode;
20836 bool is_min;
20837 rtx tmp;
20838
20839 if (code == LT)
20840 ;
20841 else if (code == UNGE)
20842 {
20843 tmp = if_true;
20844 if_true = if_false;
20845 if_false = tmp;
20846 }
20847 else
20848 return false;
20849
20850 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20851 is_min = true;
20852 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20853 is_min = false;
20854 else
20855 return false;
20856
20857 mode = GET_MODE (dest);
20858
20859 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20860 but MODE may be a vector mode and thus not appropriate. */
20861 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20862 {
20863 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20864 rtvec v;
20865
20866 if_true = force_reg (mode, if_true);
20867 v = gen_rtvec (2, if_true, if_false);
20868 tmp = gen_rtx_UNSPEC (mode, v, u);
20869 }
20870 else
20871 {
20872 code = is_min ? SMIN : SMAX;
20873 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20874 }
20875
20876 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20877 return true;
20878 }
20879
20880 /* Expand an sse vector comparison. Return the register with the result. */
20881
20882 static rtx
20883 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20884 rtx op_true, rtx op_false)
20885 {
20886 enum machine_mode mode = GET_MODE (dest);
20887 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20888
20889 /* In general case result of comparison can differ from operands' type. */
20890 enum machine_mode cmp_mode;
20891
20892 /* In AVX512F the result of comparison is an integer mask. */
20893 bool maskcmp = false;
20894 rtx x;
20895
20896 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20897 {
20898 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20899 gcc_assert (cmp_mode != BLKmode);
20900
20901 maskcmp = true;
20902 }
20903 else
20904 cmp_mode = cmp_ops_mode;
20905
20906
20907 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20908 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20909 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20910
20911 if (optimize
20912 || reg_overlap_mentioned_p (dest, op_true)
20913 || reg_overlap_mentioned_p (dest, op_false))
20914 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20915
20916 /* Compare patterns for int modes are unspec in AVX512F only. */
20917 if (maskcmp && (code == GT || code == EQ))
20918 {
20919 rtx (*gen)(rtx, rtx, rtx);
20920
20921 switch (cmp_ops_mode)
20922 {
20923 case V16SImode:
20924 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20925 break;
20926 case V8DImode:
20927 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20928 break;
20929 default:
20930 gen = NULL;
20931 }
20932
20933 if (gen)
20934 {
20935 emit_insn (gen (dest, cmp_op0, cmp_op1));
20936 return dest;
20937 }
20938 }
20939 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20940
20941 if (cmp_mode != mode && !maskcmp)
20942 {
20943 x = force_reg (cmp_ops_mode, x);
20944 convert_move (dest, x, false);
20945 }
20946 else
20947 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20948
20949 return dest;
20950 }
20951
20952 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20953 operations. This is used for both scalar and vector conditional moves. */
20954
20955 static void
20956 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20957 {
20958 enum machine_mode mode = GET_MODE (dest);
20959 enum machine_mode cmpmode = GET_MODE (cmp);
20960
20961 /* In AVX512F the result of comparison is an integer mask. */
20962 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20963
20964 rtx t2, t3, x;
20965
20966 if (vector_all_ones_operand (op_true, mode)
20967 && rtx_equal_p (op_false, CONST0_RTX (mode))
20968 && !maskcmp)
20969 {
20970 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20971 }
20972 else if (op_false == CONST0_RTX (mode)
20973 && !maskcmp)
20974 {
20975 op_true = force_reg (mode, op_true);
20976 x = gen_rtx_AND (mode, cmp, op_true);
20977 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20978 }
20979 else if (op_true == CONST0_RTX (mode)
20980 && !maskcmp)
20981 {
20982 op_false = force_reg (mode, op_false);
20983 x = gen_rtx_NOT (mode, cmp);
20984 x = gen_rtx_AND (mode, x, op_false);
20985 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20986 }
20987 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20988 && !maskcmp)
20989 {
20990 op_false = force_reg (mode, op_false);
20991 x = gen_rtx_IOR (mode, cmp, op_false);
20992 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20993 }
20994 else if (TARGET_XOP
20995 && !maskcmp)
20996 {
20997 op_true = force_reg (mode, op_true);
20998
20999 if (!nonimmediate_operand (op_false, mode))
21000 op_false = force_reg (mode, op_false);
21001
21002 emit_insn (gen_rtx_SET (mode, dest,
21003 gen_rtx_IF_THEN_ELSE (mode, cmp,
21004 op_true,
21005 op_false)));
21006 }
21007 else
21008 {
21009 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21010 rtx d = dest;
21011
21012 if (!nonimmediate_operand (op_true, mode))
21013 op_true = force_reg (mode, op_true);
21014
21015 op_false = force_reg (mode, op_false);
21016
21017 switch (mode)
21018 {
21019 case V4SFmode:
21020 if (TARGET_SSE4_1)
21021 gen = gen_sse4_1_blendvps;
21022 break;
21023 case V2DFmode:
21024 if (TARGET_SSE4_1)
21025 gen = gen_sse4_1_blendvpd;
21026 break;
21027 case V16QImode:
21028 case V8HImode:
21029 case V4SImode:
21030 case V2DImode:
21031 if (TARGET_SSE4_1)
21032 {
21033 gen = gen_sse4_1_pblendvb;
21034 if (mode != V16QImode)
21035 d = gen_reg_rtx (V16QImode);
21036 op_false = gen_lowpart (V16QImode, op_false);
21037 op_true = gen_lowpart (V16QImode, op_true);
21038 cmp = gen_lowpart (V16QImode, cmp);
21039 }
21040 break;
21041 case V8SFmode:
21042 if (TARGET_AVX)
21043 gen = gen_avx_blendvps256;
21044 break;
21045 case V4DFmode:
21046 if (TARGET_AVX)
21047 gen = gen_avx_blendvpd256;
21048 break;
21049 case V32QImode:
21050 case V16HImode:
21051 case V8SImode:
21052 case V4DImode:
21053 if (TARGET_AVX2)
21054 {
21055 gen = gen_avx2_pblendvb;
21056 if (mode != V32QImode)
21057 d = gen_reg_rtx (V32QImode);
21058 op_false = gen_lowpart (V32QImode, op_false);
21059 op_true = gen_lowpart (V32QImode, op_true);
21060 cmp = gen_lowpart (V32QImode, cmp);
21061 }
21062 break;
21063
21064 case V64QImode:
21065 gen = gen_avx512bw_blendmv64qi;
21066 break;
21067 case V32HImode:
21068 gen = gen_avx512bw_blendmv32hi;
21069 break;
21070 case V16SImode:
21071 gen = gen_avx512f_blendmv16si;
21072 break;
21073 case V8DImode:
21074 gen = gen_avx512f_blendmv8di;
21075 break;
21076 case V8DFmode:
21077 gen = gen_avx512f_blendmv8df;
21078 break;
21079 case V16SFmode:
21080 gen = gen_avx512f_blendmv16sf;
21081 break;
21082
21083 default:
21084 break;
21085 }
21086
21087 if (gen != NULL)
21088 {
21089 emit_insn (gen (d, op_false, op_true, cmp));
21090 if (d != dest)
21091 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21092 }
21093 else
21094 {
21095 op_true = force_reg (mode, op_true);
21096
21097 t2 = gen_reg_rtx (mode);
21098 if (optimize)
21099 t3 = gen_reg_rtx (mode);
21100 else
21101 t3 = dest;
21102
21103 x = gen_rtx_AND (mode, op_true, cmp);
21104 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21105
21106 x = gen_rtx_NOT (mode, cmp);
21107 x = gen_rtx_AND (mode, x, op_false);
21108 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21109
21110 x = gen_rtx_IOR (mode, t3, t2);
21111 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21112 }
21113 }
21114 }
21115
21116 /* Expand a floating-point conditional move. Return true if successful. */
21117
21118 bool
21119 ix86_expand_fp_movcc (rtx operands[])
21120 {
21121 enum machine_mode mode = GET_MODE (operands[0]);
21122 enum rtx_code code = GET_CODE (operands[1]);
21123 rtx tmp, compare_op;
21124 rtx op0 = XEXP (operands[1], 0);
21125 rtx op1 = XEXP (operands[1], 1);
21126
21127 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21128 {
21129 enum machine_mode cmode;
21130
21131 /* Since we've no cmove for sse registers, don't force bad register
21132 allocation just to gain access to it. Deny movcc when the
21133 comparison mode doesn't match the move mode. */
21134 cmode = GET_MODE (op0);
21135 if (cmode == VOIDmode)
21136 cmode = GET_MODE (op1);
21137 if (cmode != mode)
21138 return false;
21139
21140 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21141 if (code == UNKNOWN)
21142 return false;
21143
21144 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21145 operands[2], operands[3]))
21146 return true;
21147
21148 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21149 operands[2], operands[3]);
21150 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21151 return true;
21152 }
21153
21154 if (GET_MODE (op0) == TImode
21155 || (GET_MODE (op0) == DImode
21156 && !TARGET_64BIT))
21157 return false;
21158
21159 /* The floating point conditional move instructions don't directly
21160 support conditions resulting from a signed integer comparison. */
21161
21162 compare_op = ix86_expand_compare (code, op0, op1);
21163 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21164 {
21165 tmp = gen_reg_rtx (QImode);
21166 ix86_expand_setcc (tmp, code, op0, op1);
21167
21168 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21169 }
21170
21171 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21172 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21173 operands[2], operands[3])));
21174
21175 return true;
21176 }
21177
21178 /* Expand a floating-point vector conditional move; a vcond operation
21179 rather than a movcc operation. */
21180
21181 bool
21182 ix86_expand_fp_vcond (rtx operands[])
21183 {
21184 enum rtx_code code = GET_CODE (operands[3]);
21185 rtx cmp;
21186
21187 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21188 &operands[4], &operands[5]);
21189 if (code == UNKNOWN)
21190 {
21191 rtx temp;
21192 switch (GET_CODE (operands[3]))
21193 {
21194 case LTGT:
21195 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21196 operands[5], operands[0], operands[0]);
21197 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21198 operands[5], operands[1], operands[2]);
21199 code = AND;
21200 break;
21201 case UNEQ:
21202 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21203 operands[5], operands[0], operands[0]);
21204 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21205 operands[5], operands[1], operands[2]);
21206 code = IOR;
21207 break;
21208 default:
21209 gcc_unreachable ();
21210 }
21211 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21212 OPTAB_DIRECT);
21213 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21214 return true;
21215 }
21216
21217 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21218 operands[5], operands[1], operands[2]))
21219 return true;
21220
21221 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21222 operands[1], operands[2]);
21223 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21224 return true;
21225 }
21226
21227 /* Expand a signed/unsigned integral vector conditional move. */
21228
21229 bool
21230 ix86_expand_int_vcond (rtx operands[])
21231 {
21232 enum machine_mode data_mode = GET_MODE (operands[0]);
21233 enum machine_mode mode = GET_MODE (operands[4]);
21234 enum rtx_code code = GET_CODE (operands[3]);
21235 bool negate = false;
21236 rtx x, cop0, cop1;
21237
21238 cop0 = operands[4];
21239 cop1 = operands[5];
21240
21241 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21242 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21243 if ((code == LT || code == GE)
21244 && data_mode == mode
21245 && cop1 == CONST0_RTX (mode)
21246 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21247 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21248 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21249 && (GET_MODE_SIZE (data_mode) == 16
21250 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21251 {
21252 rtx negop = operands[2 - (code == LT)];
21253 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21254 if (negop == CONST1_RTX (data_mode))
21255 {
21256 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21257 operands[0], 1, OPTAB_DIRECT);
21258 if (res != operands[0])
21259 emit_move_insn (operands[0], res);
21260 return true;
21261 }
21262 else if (GET_MODE_INNER (data_mode) != DImode
21263 && vector_all_ones_operand (negop, data_mode))
21264 {
21265 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21266 operands[0], 0, OPTAB_DIRECT);
21267 if (res != operands[0])
21268 emit_move_insn (operands[0], res);
21269 return true;
21270 }
21271 }
21272
21273 if (!nonimmediate_operand (cop1, mode))
21274 cop1 = force_reg (mode, cop1);
21275 if (!general_operand (operands[1], data_mode))
21276 operands[1] = force_reg (data_mode, operands[1]);
21277 if (!general_operand (operands[2], data_mode))
21278 operands[2] = force_reg (data_mode, operands[2]);
21279
21280 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21281 if (TARGET_XOP
21282 && (mode == V16QImode || mode == V8HImode
21283 || mode == V4SImode || mode == V2DImode))
21284 ;
21285 else
21286 {
21287 /* Canonicalize the comparison to EQ, GT, GTU. */
21288 switch (code)
21289 {
21290 case EQ:
21291 case GT:
21292 case GTU:
21293 break;
21294
21295 case NE:
21296 case LE:
21297 case LEU:
21298 code = reverse_condition (code);
21299 negate = true;
21300 break;
21301
21302 case GE:
21303 case GEU:
21304 code = reverse_condition (code);
21305 negate = true;
21306 /* FALLTHRU */
21307
21308 case LT:
21309 case LTU:
21310 code = swap_condition (code);
21311 x = cop0, cop0 = cop1, cop1 = x;
21312 break;
21313
21314 default:
21315 gcc_unreachable ();
21316 }
21317
21318 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21319 if (mode == V2DImode)
21320 {
21321 switch (code)
21322 {
21323 case EQ:
21324 /* SSE4.1 supports EQ. */
21325 if (!TARGET_SSE4_1)
21326 return false;
21327 break;
21328
21329 case GT:
21330 case GTU:
21331 /* SSE4.2 supports GT/GTU. */
21332 if (!TARGET_SSE4_2)
21333 return false;
21334 break;
21335
21336 default:
21337 gcc_unreachable ();
21338 }
21339 }
21340
21341 /* Unsigned parallel compare is not supported by the hardware.
21342 Play some tricks to turn this into a signed comparison
21343 against 0. */
21344 if (code == GTU)
21345 {
21346 cop0 = force_reg (mode, cop0);
21347
21348 switch (mode)
21349 {
21350 case V16SImode:
21351 case V8DImode:
21352 case V8SImode:
21353 case V4DImode:
21354 case V4SImode:
21355 case V2DImode:
21356 {
21357 rtx t1, t2, mask;
21358 rtx (*gen_sub3) (rtx, rtx, rtx);
21359
21360 switch (mode)
21361 {
21362 case V16SImode: gen_sub3 = gen_subv16si3; break;
21363 case V8DImode: gen_sub3 = gen_subv8di3; break;
21364 case V8SImode: gen_sub3 = gen_subv8si3; break;
21365 case V4DImode: gen_sub3 = gen_subv4di3; break;
21366 case V4SImode: gen_sub3 = gen_subv4si3; break;
21367 case V2DImode: gen_sub3 = gen_subv2di3; break;
21368 default:
21369 gcc_unreachable ();
21370 }
21371 /* Subtract (-(INT MAX) - 1) from both operands to make
21372 them signed. */
21373 mask = ix86_build_signbit_mask (mode, true, false);
21374 t1 = gen_reg_rtx (mode);
21375 emit_insn (gen_sub3 (t1, cop0, mask));
21376
21377 t2 = gen_reg_rtx (mode);
21378 emit_insn (gen_sub3 (t2, cop1, mask));
21379
21380 cop0 = t1;
21381 cop1 = t2;
21382 code = GT;
21383 }
21384 break;
21385
21386 case V64QImode:
21387 case V32HImode:
21388 case V32QImode:
21389 case V16HImode:
21390 case V16QImode:
21391 case V8HImode:
21392 /* Perform a parallel unsigned saturating subtraction. */
21393 x = gen_reg_rtx (mode);
21394 emit_insn (gen_rtx_SET (VOIDmode, x,
21395 gen_rtx_US_MINUS (mode, cop0, cop1)));
21396
21397 cop0 = x;
21398 cop1 = CONST0_RTX (mode);
21399 code = EQ;
21400 negate = !negate;
21401 break;
21402
21403 default:
21404 gcc_unreachable ();
21405 }
21406 }
21407 }
21408
21409 /* Allow the comparison to be done in one mode, but the movcc to
21410 happen in another mode. */
21411 if (data_mode == mode)
21412 {
21413 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21414 operands[1+negate], operands[2-negate]);
21415 }
21416 else
21417 {
21418 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21419 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21420 operands[1+negate], operands[2-negate]);
21421 if (GET_MODE (x) == mode)
21422 x = gen_lowpart (data_mode, x);
21423 }
21424
21425 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21426 operands[2-negate]);
21427 return true;
21428 }
21429
21430 /* AVX512F does support 64-byte integer vector operations,
21431 thus the longest vector we are faced with is V64QImode. */
21432 #define MAX_VECT_LEN 64
21433
21434 struct expand_vec_perm_d
21435 {
21436 rtx target, op0, op1;
21437 unsigned char perm[MAX_VECT_LEN];
21438 enum machine_mode vmode;
21439 unsigned char nelt;
21440 bool one_operand_p;
21441 bool testing_p;
21442 };
21443
21444 static bool
21445 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21446 struct expand_vec_perm_d *d)
21447 {
21448 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21449 expander, so args are either in d, or in op0, op1 etc. */
21450 enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
21451 enum machine_mode maskmode = mode;
21452 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21453
21454 switch (mode)
21455 {
21456 case V8HImode:
21457 if (TARGET_AVX512VL && TARGET_AVX512BW)
21458 gen = gen_avx512vl_vpermi2varv8hi3;
21459 break;
21460 case V16HImode:
21461 if (TARGET_AVX512VL && TARGET_AVX512BW)
21462 gen = gen_avx512vl_vpermi2varv16hi3;
21463 break;
21464 case V32HImode:
21465 if (TARGET_AVX512BW)
21466 gen = gen_avx512bw_vpermi2varv32hi3;
21467 break;
21468 case V4SImode:
21469 if (TARGET_AVX512VL)
21470 gen = gen_avx512vl_vpermi2varv4si3;
21471 break;
21472 case V8SImode:
21473 if (TARGET_AVX512VL)
21474 gen = gen_avx512vl_vpermi2varv8si3;
21475 break;
21476 case V16SImode:
21477 if (TARGET_AVX512F)
21478 gen = gen_avx512f_vpermi2varv16si3;
21479 break;
21480 case V4SFmode:
21481 if (TARGET_AVX512VL)
21482 {
21483 gen = gen_avx512vl_vpermi2varv4sf3;
21484 maskmode = V4SImode;
21485 }
21486 break;
21487 case V8SFmode:
21488 if (TARGET_AVX512VL)
21489 {
21490 gen = gen_avx512vl_vpermi2varv8sf3;
21491 maskmode = V8SImode;
21492 }
21493 break;
21494 case V16SFmode:
21495 if (TARGET_AVX512F)
21496 {
21497 gen = gen_avx512f_vpermi2varv16sf3;
21498 maskmode = V16SImode;
21499 }
21500 break;
21501 case V2DImode:
21502 if (TARGET_AVX512VL)
21503 gen = gen_avx512vl_vpermi2varv2di3;
21504 break;
21505 case V4DImode:
21506 if (TARGET_AVX512VL)
21507 gen = gen_avx512vl_vpermi2varv4di3;
21508 break;
21509 case V8DImode:
21510 if (TARGET_AVX512F)
21511 gen = gen_avx512f_vpermi2varv8di3;
21512 break;
21513 case V2DFmode:
21514 if (TARGET_AVX512VL)
21515 {
21516 gen = gen_avx512vl_vpermi2varv2df3;
21517 maskmode = V2DImode;
21518 }
21519 break;
21520 case V4DFmode:
21521 if (TARGET_AVX512VL)
21522 {
21523 gen = gen_avx512vl_vpermi2varv4df3;
21524 maskmode = V4DImode;
21525 }
21526 break;
21527 case V8DFmode:
21528 if (TARGET_AVX512F)
21529 {
21530 gen = gen_avx512f_vpermi2varv8df3;
21531 maskmode = V8DImode;
21532 }
21533 break;
21534 default:
21535 break;
21536 }
21537
21538 if (gen == NULL)
21539 return false;
21540
21541 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21542 expander, so args are either in d, or in op0, op1 etc. */
21543 if (d)
21544 {
21545 rtx vec[64];
21546 target = d->target;
21547 op0 = d->op0;
21548 op1 = d->op1;
21549 for (int i = 0; i < d->nelt; ++i)
21550 vec[i] = GEN_INT (d->perm[i]);
21551 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21552 }
21553
21554 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21555 return true;
21556 }
21557
21558 /* Expand a variable vector permutation. */
21559
21560 void
21561 ix86_expand_vec_perm (rtx operands[])
21562 {
21563 rtx target = operands[0];
21564 rtx op0 = operands[1];
21565 rtx op1 = operands[2];
21566 rtx mask = operands[3];
21567 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21568 enum machine_mode mode = GET_MODE (op0);
21569 enum machine_mode maskmode = GET_MODE (mask);
21570 int w, e, i;
21571 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21572
21573 /* Number of elements in the vector. */
21574 w = GET_MODE_NUNITS (mode);
21575 e = GET_MODE_UNIT_SIZE (mode);
21576 gcc_assert (w <= 64);
21577
21578 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21579 return;
21580
21581 if (TARGET_AVX2)
21582 {
21583 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21584 {
21585 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21586 an constant shuffle operand. With a tiny bit of effort we can
21587 use VPERMD instead. A re-interpretation stall for V4DFmode is
21588 unfortunate but there's no avoiding it.
21589 Similarly for V16HImode we don't have instructions for variable
21590 shuffling, while for V32QImode we can use after preparing suitable
21591 masks vpshufb; vpshufb; vpermq; vpor. */
21592
21593 if (mode == V16HImode)
21594 {
21595 maskmode = mode = V32QImode;
21596 w = 32;
21597 e = 1;
21598 }
21599 else
21600 {
21601 maskmode = mode = V8SImode;
21602 w = 8;
21603 e = 4;
21604 }
21605 t1 = gen_reg_rtx (maskmode);
21606
21607 /* Replicate the low bits of the V4DImode mask into V8SImode:
21608 mask = { A B C D }
21609 t1 = { A A B B C C D D }. */
21610 for (i = 0; i < w / 2; ++i)
21611 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21612 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21613 vt = force_reg (maskmode, vt);
21614 mask = gen_lowpart (maskmode, mask);
21615 if (maskmode == V8SImode)
21616 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21617 else
21618 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21619
21620 /* Multiply the shuffle indicies by two. */
21621 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21622 OPTAB_DIRECT);
21623
21624 /* Add one to the odd shuffle indicies:
21625 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21626 for (i = 0; i < w / 2; ++i)
21627 {
21628 vec[i * 2] = const0_rtx;
21629 vec[i * 2 + 1] = const1_rtx;
21630 }
21631 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21632 vt = validize_mem (force_const_mem (maskmode, vt));
21633 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21634 OPTAB_DIRECT);
21635
21636 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21637 operands[3] = mask = t1;
21638 target = gen_reg_rtx (mode);
21639 op0 = gen_lowpart (mode, op0);
21640 op1 = gen_lowpart (mode, op1);
21641 }
21642
21643 switch (mode)
21644 {
21645 case V8SImode:
21646 /* The VPERMD and VPERMPS instructions already properly ignore
21647 the high bits of the shuffle elements. No need for us to
21648 perform an AND ourselves. */
21649 if (one_operand_shuffle)
21650 {
21651 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21652 if (target != operands[0])
21653 emit_move_insn (operands[0],
21654 gen_lowpart (GET_MODE (operands[0]), target));
21655 }
21656 else
21657 {
21658 t1 = gen_reg_rtx (V8SImode);
21659 t2 = gen_reg_rtx (V8SImode);
21660 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21661 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21662 goto merge_two;
21663 }
21664 return;
21665
21666 case V8SFmode:
21667 mask = gen_lowpart (V8SImode, mask);
21668 if (one_operand_shuffle)
21669 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21670 else
21671 {
21672 t1 = gen_reg_rtx (V8SFmode);
21673 t2 = gen_reg_rtx (V8SFmode);
21674 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21675 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21676 goto merge_two;
21677 }
21678 return;
21679
21680 case V4SImode:
21681 /* By combining the two 128-bit input vectors into one 256-bit
21682 input vector, we can use VPERMD and VPERMPS for the full
21683 two-operand shuffle. */
21684 t1 = gen_reg_rtx (V8SImode);
21685 t2 = gen_reg_rtx (V8SImode);
21686 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21687 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21688 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21689 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21690 return;
21691
21692 case V4SFmode:
21693 t1 = gen_reg_rtx (V8SFmode);
21694 t2 = gen_reg_rtx (V8SImode);
21695 mask = gen_lowpart (V4SImode, mask);
21696 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21697 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21698 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21699 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21700 return;
21701
21702 case V32QImode:
21703 t1 = gen_reg_rtx (V32QImode);
21704 t2 = gen_reg_rtx (V32QImode);
21705 t3 = gen_reg_rtx (V32QImode);
21706 vt2 = GEN_INT (-128);
21707 for (i = 0; i < 32; i++)
21708 vec[i] = vt2;
21709 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21710 vt = force_reg (V32QImode, vt);
21711 for (i = 0; i < 32; i++)
21712 vec[i] = i < 16 ? vt2 : const0_rtx;
21713 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21714 vt2 = force_reg (V32QImode, vt2);
21715 /* From mask create two adjusted masks, which contain the same
21716 bits as mask in the low 7 bits of each vector element.
21717 The first mask will have the most significant bit clear
21718 if it requests element from the same 128-bit lane
21719 and MSB set if it requests element from the other 128-bit lane.
21720 The second mask will have the opposite values of the MSB,
21721 and additionally will have its 128-bit lanes swapped.
21722 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21723 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21724 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21725 stands for other 12 bytes. */
21726 /* The bit whether element is from the same lane or the other
21727 lane is bit 4, so shift it up by 3 to the MSB position. */
21728 t5 = gen_reg_rtx (V4DImode);
21729 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21730 GEN_INT (3)));
21731 /* Clear MSB bits from the mask just in case it had them set. */
21732 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21733 /* After this t1 will have MSB set for elements from other lane. */
21734 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21735 /* Clear bits other than MSB. */
21736 emit_insn (gen_andv32qi3 (t1, t1, vt));
21737 /* Or in the lower bits from mask into t3. */
21738 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21739 /* And invert MSB bits in t1, so MSB is set for elements from the same
21740 lane. */
21741 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21742 /* Swap 128-bit lanes in t3. */
21743 t6 = gen_reg_rtx (V4DImode);
21744 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21745 const2_rtx, GEN_INT (3),
21746 const0_rtx, const1_rtx));
21747 /* And or in the lower bits from mask into t1. */
21748 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21749 if (one_operand_shuffle)
21750 {
21751 /* Each of these shuffles will put 0s in places where
21752 element from the other 128-bit lane is needed, otherwise
21753 will shuffle in the requested value. */
21754 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21755 gen_lowpart (V32QImode, t6)));
21756 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21757 /* For t3 the 128-bit lanes are swapped again. */
21758 t7 = gen_reg_rtx (V4DImode);
21759 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21760 const2_rtx, GEN_INT (3),
21761 const0_rtx, const1_rtx));
21762 /* And oring both together leads to the result. */
21763 emit_insn (gen_iorv32qi3 (target, t1,
21764 gen_lowpart (V32QImode, t7)));
21765 if (target != operands[0])
21766 emit_move_insn (operands[0],
21767 gen_lowpart (GET_MODE (operands[0]), target));
21768 return;
21769 }
21770
21771 t4 = gen_reg_rtx (V32QImode);
21772 /* Similarly to the above one_operand_shuffle code,
21773 just for repeated twice for each operand. merge_two:
21774 code will merge the two results together. */
21775 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21776 gen_lowpart (V32QImode, t6)));
21777 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21778 gen_lowpart (V32QImode, t6)));
21779 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21780 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21781 t7 = gen_reg_rtx (V4DImode);
21782 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21783 const2_rtx, GEN_INT (3),
21784 const0_rtx, const1_rtx));
21785 t8 = gen_reg_rtx (V4DImode);
21786 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21787 const2_rtx, GEN_INT (3),
21788 const0_rtx, const1_rtx));
21789 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21790 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21791 t1 = t4;
21792 t2 = t3;
21793 goto merge_two;
21794
21795 default:
21796 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21797 break;
21798 }
21799 }
21800
21801 if (TARGET_XOP)
21802 {
21803 /* The XOP VPPERM insn supports three inputs. By ignoring the
21804 one_operand_shuffle special case, we avoid creating another
21805 set of constant vectors in memory. */
21806 one_operand_shuffle = false;
21807
21808 /* mask = mask & {2*w-1, ...} */
21809 vt = GEN_INT (2*w - 1);
21810 }
21811 else
21812 {
21813 /* mask = mask & {w-1, ...} */
21814 vt = GEN_INT (w - 1);
21815 }
21816
21817 for (i = 0; i < w; i++)
21818 vec[i] = vt;
21819 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21820 mask = expand_simple_binop (maskmode, AND, mask, vt,
21821 NULL_RTX, 0, OPTAB_DIRECT);
21822
21823 /* For non-QImode operations, convert the word permutation control
21824 into a byte permutation control. */
21825 if (mode != V16QImode)
21826 {
21827 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21828 GEN_INT (exact_log2 (e)),
21829 NULL_RTX, 0, OPTAB_DIRECT);
21830
21831 /* Convert mask to vector of chars. */
21832 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21833
21834 /* Replicate each of the input bytes into byte positions:
21835 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21836 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21837 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21838 for (i = 0; i < 16; ++i)
21839 vec[i] = GEN_INT (i/e * e);
21840 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21841 vt = validize_mem (force_const_mem (V16QImode, vt));
21842 if (TARGET_XOP)
21843 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21844 else
21845 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21846
21847 /* Convert it into the byte positions by doing
21848 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21849 for (i = 0; i < 16; ++i)
21850 vec[i] = GEN_INT (i % e);
21851 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21852 vt = validize_mem (force_const_mem (V16QImode, vt));
21853 emit_insn (gen_addv16qi3 (mask, mask, vt));
21854 }
21855
21856 /* The actual shuffle operations all operate on V16QImode. */
21857 op0 = gen_lowpart (V16QImode, op0);
21858 op1 = gen_lowpart (V16QImode, op1);
21859
21860 if (TARGET_XOP)
21861 {
21862 if (GET_MODE (target) != V16QImode)
21863 target = gen_reg_rtx (V16QImode);
21864 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21865 if (target != operands[0])
21866 emit_move_insn (operands[0],
21867 gen_lowpart (GET_MODE (operands[0]), target));
21868 }
21869 else if (one_operand_shuffle)
21870 {
21871 if (GET_MODE (target) != V16QImode)
21872 target = gen_reg_rtx (V16QImode);
21873 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21874 if (target != operands[0])
21875 emit_move_insn (operands[0],
21876 gen_lowpart (GET_MODE (operands[0]), target));
21877 }
21878 else
21879 {
21880 rtx xops[6];
21881 bool ok;
21882
21883 /* Shuffle the two input vectors independently. */
21884 t1 = gen_reg_rtx (V16QImode);
21885 t2 = gen_reg_rtx (V16QImode);
21886 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21887 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21888
21889 merge_two:
21890 /* Then merge them together. The key is whether any given control
21891 element contained a bit set that indicates the second word. */
21892 mask = operands[3];
21893 vt = GEN_INT (w);
21894 if (maskmode == V2DImode && !TARGET_SSE4_1)
21895 {
21896 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21897 more shuffle to convert the V2DI input mask into a V4SI
21898 input mask. At which point the masking that expand_int_vcond
21899 will work as desired. */
21900 rtx t3 = gen_reg_rtx (V4SImode);
21901 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21902 const0_rtx, const0_rtx,
21903 const2_rtx, const2_rtx));
21904 mask = t3;
21905 maskmode = V4SImode;
21906 e = w = 4;
21907 }
21908
21909 for (i = 0; i < w; i++)
21910 vec[i] = vt;
21911 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21912 vt = force_reg (maskmode, vt);
21913 mask = expand_simple_binop (maskmode, AND, mask, vt,
21914 NULL_RTX, 0, OPTAB_DIRECT);
21915
21916 if (GET_MODE (target) != mode)
21917 target = gen_reg_rtx (mode);
21918 xops[0] = target;
21919 xops[1] = gen_lowpart (mode, t2);
21920 xops[2] = gen_lowpart (mode, t1);
21921 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21922 xops[4] = mask;
21923 xops[5] = vt;
21924 ok = ix86_expand_int_vcond (xops);
21925 gcc_assert (ok);
21926 if (target != operands[0])
21927 emit_move_insn (operands[0],
21928 gen_lowpart (GET_MODE (operands[0]), target));
21929 }
21930 }
21931
21932 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21933 true if we should do zero extension, else sign extension. HIGH_P is
21934 true if we want the N/2 high elements, else the low elements. */
21935
21936 void
21937 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21938 {
21939 enum machine_mode imode = GET_MODE (src);
21940 rtx tmp;
21941
21942 if (TARGET_SSE4_1)
21943 {
21944 rtx (*unpack)(rtx, rtx);
21945 rtx (*extract)(rtx, rtx) = NULL;
21946 enum machine_mode halfmode = BLKmode;
21947
21948 switch (imode)
21949 {
21950 case V64QImode:
21951 if (unsigned_p)
21952 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
21953 else
21954 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
21955 halfmode = V32QImode;
21956 extract
21957 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
21958 break;
21959 case V32QImode:
21960 if (unsigned_p)
21961 unpack = gen_avx2_zero_extendv16qiv16hi2;
21962 else
21963 unpack = gen_avx2_sign_extendv16qiv16hi2;
21964 halfmode = V16QImode;
21965 extract
21966 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21967 break;
21968 case V32HImode:
21969 if (unsigned_p)
21970 unpack = gen_avx512f_zero_extendv16hiv16si2;
21971 else
21972 unpack = gen_avx512f_sign_extendv16hiv16si2;
21973 halfmode = V16HImode;
21974 extract
21975 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21976 break;
21977 case V16HImode:
21978 if (unsigned_p)
21979 unpack = gen_avx2_zero_extendv8hiv8si2;
21980 else
21981 unpack = gen_avx2_sign_extendv8hiv8si2;
21982 halfmode = V8HImode;
21983 extract
21984 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21985 break;
21986 case V16SImode:
21987 if (unsigned_p)
21988 unpack = gen_avx512f_zero_extendv8siv8di2;
21989 else
21990 unpack = gen_avx512f_sign_extendv8siv8di2;
21991 halfmode = V8SImode;
21992 extract
21993 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21994 break;
21995 case V8SImode:
21996 if (unsigned_p)
21997 unpack = gen_avx2_zero_extendv4siv4di2;
21998 else
21999 unpack = gen_avx2_sign_extendv4siv4di2;
22000 halfmode = V4SImode;
22001 extract
22002 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22003 break;
22004 case V16QImode:
22005 if (unsigned_p)
22006 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22007 else
22008 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22009 break;
22010 case V8HImode:
22011 if (unsigned_p)
22012 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22013 else
22014 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22015 break;
22016 case V4SImode:
22017 if (unsigned_p)
22018 unpack = gen_sse4_1_zero_extendv2siv2di2;
22019 else
22020 unpack = gen_sse4_1_sign_extendv2siv2di2;
22021 break;
22022 default:
22023 gcc_unreachable ();
22024 }
22025
22026 if (GET_MODE_SIZE (imode) >= 32)
22027 {
22028 tmp = gen_reg_rtx (halfmode);
22029 emit_insn (extract (tmp, src));
22030 }
22031 else if (high_p)
22032 {
22033 /* Shift higher 8 bytes to lower 8 bytes. */
22034 tmp = gen_reg_rtx (V1TImode);
22035 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22036 GEN_INT (64)));
22037 tmp = gen_lowpart (imode, tmp);
22038 }
22039 else
22040 tmp = src;
22041
22042 emit_insn (unpack (dest, tmp));
22043 }
22044 else
22045 {
22046 rtx (*unpack)(rtx, rtx, rtx);
22047
22048 switch (imode)
22049 {
22050 case V16QImode:
22051 if (high_p)
22052 unpack = gen_vec_interleave_highv16qi;
22053 else
22054 unpack = gen_vec_interleave_lowv16qi;
22055 break;
22056 case V8HImode:
22057 if (high_p)
22058 unpack = gen_vec_interleave_highv8hi;
22059 else
22060 unpack = gen_vec_interleave_lowv8hi;
22061 break;
22062 case V4SImode:
22063 if (high_p)
22064 unpack = gen_vec_interleave_highv4si;
22065 else
22066 unpack = gen_vec_interleave_lowv4si;
22067 break;
22068 default:
22069 gcc_unreachable ();
22070 }
22071
22072 if (unsigned_p)
22073 tmp = force_reg (imode, CONST0_RTX (imode));
22074 else
22075 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22076 src, pc_rtx, pc_rtx);
22077
22078 rtx tmp2 = gen_reg_rtx (imode);
22079 emit_insn (unpack (tmp2, src, tmp));
22080 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22081 }
22082 }
22083
22084 /* Expand conditional increment or decrement using adb/sbb instructions.
22085 The default case using setcc followed by the conditional move can be
22086 done by generic code. */
22087 bool
22088 ix86_expand_int_addcc (rtx operands[])
22089 {
22090 enum rtx_code code = GET_CODE (operands[1]);
22091 rtx flags;
22092 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22093 rtx compare_op;
22094 rtx val = const0_rtx;
22095 bool fpcmp = false;
22096 enum machine_mode mode;
22097 rtx op0 = XEXP (operands[1], 0);
22098 rtx op1 = XEXP (operands[1], 1);
22099
22100 if (operands[3] != const1_rtx
22101 && operands[3] != constm1_rtx)
22102 return false;
22103 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22104 return false;
22105 code = GET_CODE (compare_op);
22106
22107 flags = XEXP (compare_op, 0);
22108
22109 if (GET_MODE (flags) == CCFPmode
22110 || GET_MODE (flags) == CCFPUmode)
22111 {
22112 fpcmp = true;
22113 code = ix86_fp_compare_code_to_integer (code);
22114 }
22115
22116 if (code != LTU)
22117 {
22118 val = constm1_rtx;
22119 if (fpcmp)
22120 PUT_CODE (compare_op,
22121 reverse_condition_maybe_unordered
22122 (GET_CODE (compare_op)));
22123 else
22124 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22125 }
22126
22127 mode = GET_MODE (operands[0]);
22128
22129 /* Construct either adc or sbb insn. */
22130 if ((code == LTU) == (operands[3] == constm1_rtx))
22131 {
22132 switch (mode)
22133 {
22134 case QImode:
22135 insn = gen_subqi3_carry;
22136 break;
22137 case HImode:
22138 insn = gen_subhi3_carry;
22139 break;
22140 case SImode:
22141 insn = gen_subsi3_carry;
22142 break;
22143 case DImode:
22144 insn = gen_subdi3_carry;
22145 break;
22146 default:
22147 gcc_unreachable ();
22148 }
22149 }
22150 else
22151 {
22152 switch (mode)
22153 {
22154 case QImode:
22155 insn = gen_addqi3_carry;
22156 break;
22157 case HImode:
22158 insn = gen_addhi3_carry;
22159 break;
22160 case SImode:
22161 insn = gen_addsi3_carry;
22162 break;
22163 case DImode:
22164 insn = gen_adddi3_carry;
22165 break;
22166 default:
22167 gcc_unreachable ();
22168 }
22169 }
22170 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22171
22172 return true;
22173 }
22174
22175
22176 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22177 but works for floating pointer parameters and nonoffsetable memories.
22178 For pushes, it returns just stack offsets; the values will be saved
22179 in the right order. Maximally three parts are generated. */
22180
22181 static int
22182 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22183 {
22184 int size;
22185
22186 if (!TARGET_64BIT)
22187 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22188 else
22189 size = (GET_MODE_SIZE (mode) + 4) / 8;
22190
22191 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22192 gcc_assert (size >= 2 && size <= 4);
22193
22194 /* Optimize constant pool reference to immediates. This is used by fp
22195 moves, that force all constants to memory to allow combining. */
22196 if (MEM_P (operand) && MEM_READONLY_P (operand))
22197 {
22198 rtx tmp = maybe_get_pool_constant (operand);
22199 if (tmp)
22200 operand = tmp;
22201 }
22202
22203 if (MEM_P (operand) && !offsettable_memref_p (operand))
22204 {
22205 /* The only non-offsetable memories we handle are pushes. */
22206 int ok = push_operand (operand, VOIDmode);
22207
22208 gcc_assert (ok);
22209
22210 operand = copy_rtx (operand);
22211 PUT_MODE (operand, word_mode);
22212 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22213 return size;
22214 }
22215
22216 if (GET_CODE (operand) == CONST_VECTOR)
22217 {
22218 enum machine_mode imode = int_mode_for_mode (mode);
22219 /* Caution: if we looked through a constant pool memory above,
22220 the operand may actually have a different mode now. That's
22221 ok, since we want to pun this all the way back to an integer. */
22222 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22223 gcc_assert (operand != NULL);
22224 mode = imode;
22225 }
22226
22227 if (!TARGET_64BIT)
22228 {
22229 if (mode == DImode)
22230 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22231 else
22232 {
22233 int i;
22234
22235 if (REG_P (operand))
22236 {
22237 gcc_assert (reload_completed);
22238 for (i = 0; i < size; i++)
22239 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22240 }
22241 else if (offsettable_memref_p (operand))
22242 {
22243 operand = adjust_address (operand, SImode, 0);
22244 parts[0] = operand;
22245 for (i = 1; i < size; i++)
22246 parts[i] = adjust_address (operand, SImode, 4 * i);
22247 }
22248 else if (GET_CODE (operand) == CONST_DOUBLE)
22249 {
22250 REAL_VALUE_TYPE r;
22251 long l[4];
22252
22253 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22254 switch (mode)
22255 {
22256 case TFmode:
22257 real_to_target (l, &r, mode);
22258 parts[3] = gen_int_mode (l[3], SImode);
22259 parts[2] = gen_int_mode (l[2], SImode);
22260 break;
22261 case XFmode:
22262 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22263 long double may not be 80-bit. */
22264 real_to_target (l, &r, mode);
22265 parts[2] = gen_int_mode (l[2], SImode);
22266 break;
22267 case DFmode:
22268 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22269 break;
22270 default:
22271 gcc_unreachable ();
22272 }
22273 parts[1] = gen_int_mode (l[1], SImode);
22274 parts[0] = gen_int_mode (l[0], SImode);
22275 }
22276 else
22277 gcc_unreachable ();
22278 }
22279 }
22280 else
22281 {
22282 if (mode == TImode)
22283 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22284 if (mode == XFmode || mode == TFmode)
22285 {
22286 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22287 if (REG_P (operand))
22288 {
22289 gcc_assert (reload_completed);
22290 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22291 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22292 }
22293 else if (offsettable_memref_p (operand))
22294 {
22295 operand = adjust_address (operand, DImode, 0);
22296 parts[0] = operand;
22297 parts[1] = adjust_address (operand, upper_mode, 8);
22298 }
22299 else if (GET_CODE (operand) == CONST_DOUBLE)
22300 {
22301 REAL_VALUE_TYPE r;
22302 long l[4];
22303
22304 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22305 real_to_target (l, &r, mode);
22306
22307 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22308 if (HOST_BITS_PER_WIDE_INT >= 64)
22309 parts[0]
22310 = gen_int_mode
22311 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22312 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22313 DImode);
22314 else
22315 parts[0] = immed_double_const (l[0], l[1], DImode);
22316
22317 if (upper_mode == SImode)
22318 parts[1] = gen_int_mode (l[2], SImode);
22319 else if (HOST_BITS_PER_WIDE_INT >= 64)
22320 parts[1]
22321 = gen_int_mode
22322 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22323 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22324 DImode);
22325 else
22326 parts[1] = immed_double_const (l[2], l[3], DImode);
22327 }
22328 else
22329 gcc_unreachable ();
22330 }
22331 }
22332
22333 return size;
22334 }
22335
22336 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22337 Return false when normal moves are needed; true when all required
22338 insns have been emitted. Operands 2-4 contain the input values
22339 int the correct order; operands 5-7 contain the output values. */
22340
22341 void
22342 ix86_split_long_move (rtx operands[])
22343 {
22344 rtx part[2][4];
22345 int nparts, i, j;
22346 int push = 0;
22347 int collisions = 0;
22348 enum machine_mode mode = GET_MODE (operands[0]);
22349 bool collisionparts[4];
22350
22351 /* The DFmode expanders may ask us to move double.
22352 For 64bit target this is single move. By hiding the fact
22353 here we simplify i386.md splitters. */
22354 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22355 {
22356 /* Optimize constant pool reference to immediates. This is used by
22357 fp moves, that force all constants to memory to allow combining. */
22358
22359 if (MEM_P (operands[1])
22360 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22361 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22362 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22363 if (push_operand (operands[0], VOIDmode))
22364 {
22365 operands[0] = copy_rtx (operands[0]);
22366 PUT_MODE (operands[0], word_mode);
22367 }
22368 else
22369 operands[0] = gen_lowpart (DImode, operands[0]);
22370 operands[1] = gen_lowpart (DImode, operands[1]);
22371 emit_move_insn (operands[0], operands[1]);
22372 return;
22373 }
22374
22375 /* The only non-offsettable memory we handle is push. */
22376 if (push_operand (operands[0], VOIDmode))
22377 push = 1;
22378 else
22379 gcc_assert (!MEM_P (operands[0])
22380 || offsettable_memref_p (operands[0]));
22381
22382 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22383 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22384
22385 /* When emitting push, take care for source operands on the stack. */
22386 if (push && MEM_P (operands[1])
22387 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22388 {
22389 rtx src_base = XEXP (part[1][nparts - 1], 0);
22390
22391 /* Compensate for the stack decrement by 4. */
22392 if (!TARGET_64BIT && nparts == 3
22393 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22394 src_base = plus_constant (Pmode, src_base, 4);
22395
22396 /* src_base refers to the stack pointer and is
22397 automatically decreased by emitted push. */
22398 for (i = 0; i < nparts; i++)
22399 part[1][i] = change_address (part[1][i],
22400 GET_MODE (part[1][i]), src_base);
22401 }
22402
22403 /* We need to do copy in the right order in case an address register
22404 of the source overlaps the destination. */
22405 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22406 {
22407 rtx tmp;
22408
22409 for (i = 0; i < nparts; i++)
22410 {
22411 collisionparts[i]
22412 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22413 if (collisionparts[i])
22414 collisions++;
22415 }
22416
22417 /* Collision in the middle part can be handled by reordering. */
22418 if (collisions == 1 && nparts == 3 && collisionparts [1])
22419 {
22420 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22421 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22422 }
22423 else if (collisions == 1
22424 && nparts == 4
22425 && (collisionparts [1] || collisionparts [2]))
22426 {
22427 if (collisionparts [1])
22428 {
22429 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22430 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22431 }
22432 else
22433 {
22434 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22435 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22436 }
22437 }
22438
22439 /* If there are more collisions, we can't handle it by reordering.
22440 Do an lea to the last part and use only one colliding move. */
22441 else if (collisions > 1)
22442 {
22443 rtx base;
22444
22445 collisions = 1;
22446
22447 base = part[0][nparts - 1];
22448
22449 /* Handle the case when the last part isn't valid for lea.
22450 Happens in 64-bit mode storing the 12-byte XFmode. */
22451 if (GET_MODE (base) != Pmode)
22452 base = gen_rtx_REG (Pmode, REGNO (base));
22453
22454 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22455 part[1][0] = replace_equiv_address (part[1][0], base);
22456 for (i = 1; i < nparts; i++)
22457 {
22458 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22459 part[1][i] = replace_equiv_address (part[1][i], tmp);
22460 }
22461 }
22462 }
22463
22464 if (push)
22465 {
22466 if (!TARGET_64BIT)
22467 {
22468 if (nparts == 3)
22469 {
22470 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22471 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22472 stack_pointer_rtx, GEN_INT (-4)));
22473 emit_move_insn (part[0][2], part[1][2]);
22474 }
22475 else if (nparts == 4)
22476 {
22477 emit_move_insn (part[0][3], part[1][3]);
22478 emit_move_insn (part[0][2], part[1][2]);
22479 }
22480 }
22481 else
22482 {
22483 /* In 64bit mode we don't have 32bit push available. In case this is
22484 register, it is OK - we will just use larger counterpart. We also
22485 retype memory - these comes from attempt to avoid REX prefix on
22486 moving of second half of TFmode value. */
22487 if (GET_MODE (part[1][1]) == SImode)
22488 {
22489 switch (GET_CODE (part[1][1]))
22490 {
22491 case MEM:
22492 part[1][1] = adjust_address (part[1][1], DImode, 0);
22493 break;
22494
22495 case REG:
22496 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22497 break;
22498
22499 default:
22500 gcc_unreachable ();
22501 }
22502
22503 if (GET_MODE (part[1][0]) == SImode)
22504 part[1][0] = part[1][1];
22505 }
22506 }
22507 emit_move_insn (part[0][1], part[1][1]);
22508 emit_move_insn (part[0][0], part[1][0]);
22509 return;
22510 }
22511
22512 /* Choose correct order to not overwrite the source before it is copied. */
22513 if ((REG_P (part[0][0])
22514 && REG_P (part[1][1])
22515 && (REGNO (part[0][0]) == REGNO (part[1][1])
22516 || (nparts == 3
22517 && REGNO (part[0][0]) == REGNO (part[1][2]))
22518 || (nparts == 4
22519 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22520 || (collisions > 0
22521 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22522 {
22523 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22524 {
22525 operands[2 + i] = part[0][j];
22526 operands[6 + i] = part[1][j];
22527 }
22528 }
22529 else
22530 {
22531 for (i = 0; i < nparts; i++)
22532 {
22533 operands[2 + i] = part[0][i];
22534 operands[6 + i] = part[1][i];
22535 }
22536 }
22537
22538 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22539 if (optimize_insn_for_size_p ())
22540 {
22541 for (j = 0; j < nparts - 1; j++)
22542 if (CONST_INT_P (operands[6 + j])
22543 && operands[6 + j] != const0_rtx
22544 && REG_P (operands[2 + j]))
22545 for (i = j; i < nparts - 1; i++)
22546 if (CONST_INT_P (operands[7 + i])
22547 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22548 operands[7 + i] = operands[2 + j];
22549 }
22550
22551 for (i = 0; i < nparts; i++)
22552 emit_move_insn (operands[2 + i], operands[6 + i]);
22553
22554 return;
22555 }
22556
22557 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22558 left shift by a constant, either using a single shift or
22559 a sequence of add instructions. */
22560
22561 static void
22562 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22563 {
22564 rtx (*insn)(rtx, rtx, rtx);
22565
22566 if (count == 1
22567 || (count * ix86_cost->add <= ix86_cost->shift_const
22568 && !optimize_insn_for_size_p ()))
22569 {
22570 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22571 while (count-- > 0)
22572 emit_insn (insn (operand, operand, operand));
22573 }
22574 else
22575 {
22576 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22577 emit_insn (insn (operand, operand, GEN_INT (count)));
22578 }
22579 }
22580
22581 void
22582 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22583 {
22584 rtx (*gen_ashl3)(rtx, rtx, rtx);
22585 rtx (*gen_shld)(rtx, rtx, rtx);
22586 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22587
22588 rtx low[2], high[2];
22589 int count;
22590
22591 if (CONST_INT_P (operands[2]))
22592 {
22593 split_double_mode (mode, operands, 2, low, high);
22594 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22595
22596 if (count >= half_width)
22597 {
22598 emit_move_insn (high[0], low[1]);
22599 emit_move_insn (low[0], const0_rtx);
22600
22601 if (count > half_width)
22602 ix86_expand_ashl_const (high[0], count - half_width, mode);
22603 }
22604 else
22605 {
22606 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22607
22608 if (!rtx_equal_p (operands[0], operands[1]))
22609 emit_move_insn (operands[0], operands[1]);
22610
22611 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22612 ix86_expand_ashl_const (low[0], count, mode);
22613 }
22614 return;
22615 }
22616
22617 split_double_mode (mode, operands, 1, low, high);
22618
22619 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22620
22621 if (operands[1] == const1_rtx)
22622 {
22623 /* Assuming we've chosen a QImode capable registers, then 1 << N
22624 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22625 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22626 {
22627 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22628
22629 ix86_expand_clear (low[0]);
22630 ix86_expand_clear (high[0]);
22631 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22632
22633 d = gen_lowpart (QImode, low[0]);
22634 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22635 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22636 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22637
22638 d = gen_lowpart (QImode, high[0]);
22639 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22640 s = gen_rtx_NE (QImode, flags, const0_rtx);
22641 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22642 }
22643
22644 /* Otherwise, we can get the same results by manually performing
22645 a bit extract operation on bit 5/6, and then performing the two
22646 shifts. The two methods of getting 0/1 into low/high are exactly
22647 the same size. Avoiding the shift in the bit extract case helps
22648 pentium4 a bit; no one else seems to care much either way. */
22649 else
22650 {
22651 enum machine_mode half_mode;
22652 rtx (*gen_lshr3)(rtx, rtx, rtx);
22653 rtx (*gen_and3)(rtx, rtx, rtx);
22654 rtx (*gen_xor3)(rtx, rtx, rtx);
22655 HOST_WIDE_INT bits;
22656 rtx x;
22657
22658 if (mode == DImode)
22659 {
22660 half_mode = SImode;
22661 gen_lshr3 = gen_lshrsi3;
22662 gen_and3 = gen_andsi3;
22663 gen_xor3 = gen_xorsi3;
22664 bits = 5;
22665 }
22666 else
22667 {
22668 half_mode = DImode;
22669 gen_lshr3 = gen_lshrdi3;
22670 gen_and3 = gen_anddi3;
22671 gen_xor3 = gen_xordi3;
22672 bits = 6;
22673 }
22674
22675 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22676 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22677 else
22678 x = gen_lowpart (half_mode, operands[2]);
22679 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22680
22681 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22682 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22683 emit_move_insn (low[0], high[0]);
22684 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22685 }
22686
22687 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22688 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22689 return;
22690 }
22691
22692 if (operands[1] == constm1_rtx)
22693 {
22694 /* For -1 << N, we can avoid the shld instruction, because we
22695 know that we're shifting 0...31/63 ones into a -1. */
22696 emit_move_insn (low[0], constm1_rtx);
22697 if (optimize_insn_for_size_p ())
22698 emit_move_insn (high[0], low[0]);
22699 else
22700 emit_move_insn (high[0], constm1_rtx);
22701 }
22702 else
22703 {
22704 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22705
22706 if (!rtx_equal_p (operands[0], operands[1]))
22707 emit_move_insn (operands[0], operands[1]);
22708
22709 split_double_mode (mode, operands, 1, low, high);
22710 emit_insn (gen_shld (high[0], low[0], operands[2]));
22711 }
22712
22713 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22714
22715 if (TARGET_CMOVE && scratch)
22716 {
22717 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22718 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22719
22720 ix86_expand_clear (scratch);
22721 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22722 }
22723 else
22724 {
22725 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22726 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22727
22728 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22729 }
22730 }
22731
22732 void
22733 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22734 {
22735 rtx (*gen_ashr3)(rtx, rtx, rtx)
22736 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22737 rtx (*gen_shrd)(rtx, rtx, rtx);
22738 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22739
22740 rtx low[2], high[2];
22741 int count;
22742
22743 if (CONST_INT_P (operands[2]))
22744 {
22745 split_double_mode (mode, operands, 2, low, high);
22746 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22747
22748 if (count == GET_MODE_BITSIZE (mode) - 1)
22749 {
22750 emit_move_insn (high[0], high[1]);
22751 emit_insn (gen_ashr3 (high[0], high[0],
22752 GEN_INT (half_width - 1)));
22753 emit_move_insn (low[0], high[0]);
22754
22755 }
22756 else if (count >= half_width)
22757 {
22758 emit_move_insn (low[0], high[1]);
22759 emit_move_insn (high[0], low[0]);
22760 emit_insn (gen_ashr3 (high[0], high[0],
22761 GEN_INT (half_width - 1)));
22762
22763 if (count > half_width)
22764 emit_insn (gen_ashr3 (low[0], low[0],
22765 GEN_INT (count - half_width)));
22766 }
22767 else
22768 {
22769 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22770
22771 if (!rtx_equal_p (operands[0], operands[1]))
22772 emit_move_insn (operands[0], operands[1]);
22773
22774 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22775 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22776 }
22777 }
22778 else
22779 {
22780 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22781
22782 if (!rtx_equal_p (operands[0], operands[1]))
22783 emit_move_insn (operands[0], operands[1]);
22784
22785 split_double_mode (mode, operands, 1, low, high);
22786
22787 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22788 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22789
22790 if (TARGET_CMOVE && scratch)
22791 {
22792 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22793 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22794
22795 emit_move_insn (scratch, high[0]);
22796 emit_insn (gen_ashr3 (scratch, scratch,
22797 GEN_INT (half_width - 1)));
22798 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22799 scratch));
22800 }
22801 else
22802 {
22803 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22804 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22805
22806 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22807 }
22808 }
22809 }
22810
22811 void
22812 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22813 {
22814 rtx (*gen_lshr3)(rtx, rtx, rtx)
22815 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22816 rtx (*gen_shrd)(rtx, rtx, rtx);
22817 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22818
22819 rtx low[2], high[2];
22820 int count;
22821
22822 if (CONST_INT_P (operands[2]))
22823 {
22824 split_double_mode (mode, operands, 2, low, high);
22825 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22826
22827 if (count >= half_width)
22828 {
22829 emit_move_insn (low[0], high[1]);
22830 ix86_expand_clear (high[0]);
22831
22832 if (count > half_width)
22833 emit_insn (gen_lshr3 (low[0], low[0],
22834 GEN_INT (count - half_width)));
22835 }
22836 else
22837 {
22838 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22839
22840 if (!rtx_equal_p (operands[0], operands[1]))
22841 emit_move_insn (operands[0], operands[1]);
22842
22843 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22844 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22845 }
22846 }
22847 else
22848 {
22849 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22850
22851 if (!rtx_equal_p (operands[0], operands[1]))
22852 emit_move_insn (operands[0], operands[1]);
22853
22854 split_double_mode (mode, operands, 1, low, high);
22855
22856 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22857 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22858
22859 if (TARGET_CMOVE && scratch)
22860 {
22861 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22862 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22863
22864 ix86_expand_clear (scratch);
22865 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22866 scratch));
22867 }
22868 else
22869 {
22870 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22871 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22872
22873 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22874 }
22875 }
22876 }
22877
22878 /* Predict just emitted jump instruction to be taken with probability PROB. */
22879 static void
22880 predict_jump (int prob)
22881 {
22882 rtx insn = get_last_insn ();
22883 gcc_assert (JUMP_P (insn));
22884 add_int_reg_note (insn, REG_BR_PROB, prob);
22885 }
22886
22887 /* Helper function for the string operations below. Dest VARIABLE whether
22888 it is aligned to VALUE bytes. If true, jump to the label. */
22889 static rtx_code_label *
22890 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22891 {
22892 rtx_code_label *label = gen_label_rtx ();
22893 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22894 if (GET_MODE (variable) == DImode)
22895 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22896 else
22897 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22898 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22899 1, label);
22900 if (epilogue)
22901 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22902 else
22903 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22904 return label;
22905 }
22906
22907 /* Adjust COUNTER by the VALUE. */
22908 static void
22909 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22910 {
22911 rtx (*gen_add)(rtx, rtx, rtx)
22912 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22913
22914 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22915 }
22916
22917 /* Zero extend possibly SImode EXP to Pmode register. */
22918 rtx
22919 ix86_zero_extend_to_Pmode (rtx exp)
22920 {
22921 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22922 }
22923
22924 /* Divide COUNTREG by SCALE. */
22925 static rtx
22926 scale_counter (rtx countreg, int scale)
22927 {
22928 rtx sc;
22929
22930 if (scale == 1)
22931 return countreg;
22932 if (CONST_INT_P (countreg))
22933 return GEN_INT (INTVAL (countreg) / scale);
22934 gcc_assert (REG_P (countreg));
22935
22936 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22937 GEN_INT (exact_log2 (scale)),
22938 NULL, 1, OPTAB_DIRECT);
22939 return sc;
22940 }
22941
22942 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22943 DImode for constant loop counts. */
22944
22945 static enum machine_mode
22946 counter_mode (rtx count_exp)
22947 {
22948 if (GET_MODE (count_exp) != VOIDmode)
22949 return GET_MODE (count_exp);
22950 if (!CONST_INT_P (count_exp))
22951 return Pmode;
22952 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22953 return DImode;
22954 return SImode;
22955 }
22956
22957 /* Copy the address to a Pmode register. This is used for x32 to
22958 truncate DImode TLS address to a SImode register. */
22959
22960 static rtx
22961 ix86_copy_addr_to_reg (rtx addr)
22962 {
22963 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22964 return copy_addr_to_reg (addr);
22965 else
22966 {
22967 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22968 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22969 }
22970 }
22971
22972 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22973 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22974 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22975 memory by VALUE (supposed to be in MODE).
22976
22977 The size is rounded down to whole number of chunk size moved at once.
22978 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22979
22980
22981 static void
22982 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22983 rtx destptr, rtx srcptr, rtx value,
22984 rtx count, enum machine_mode mode, int unroll,
22985 int expected_size, bool issetmem)
22986 {
22987 rtx_code_label *out_label, *top_label;
22988 rtx iter, tmp;
22989 enum machine_mode iter_mode = counter_mode (count);
22990 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22991 rtx piece_size = GEN_INT (piece_size_n);
22992 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22993 rtx size;
22994 int i;
22995
22996 top_label = gen_label_rtx ();
22997 out_label = gen_label_rtx ();
22998 iter = gen_reg_rtx (iter_mode);
22999
23000 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23001 NULL, 1, OPTAB_DIRECT);
23002 /* Those two should combine. */
23003 if (piece_size == const1_rtx)
23004 {
23005 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23006 true, out_label);
23007 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23008 }
23009 emit_move_insn (iter, const0_rtx);
23010
23011 emit_label (top_label);
23012
23013 tmp = convert_modes (Pmode, iter_mode, iter, true);
23014
23015 /* This assert could be relaxed - in this case we'll need to compute
23016 smallest power of two, containing in PIECE_SIZE_N and pass it to
23017 offset_address. */
23018 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23019 destmem = offset_address (destmem, tmp, piece_size_n);
23020 destmem = adjust_address (destmem, mode, 0);
23021
23022 if (!issetmem)
23023 {
23024 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23025 srcmem = adjust_address (srcmem, mode, 0);
23026
23027 /* When unrolling for chips that reorder memory reads and writes,
23028 we can save registers by using single temporary.
23029 Also using 4 temporaries is overkill in 32bit mode. */
23030 if (!TARGET_64BIT && 0)
23031 {
23032 for (i = 0; i < unroll; i++)
23033 {
23034 if (i)
23035 {
23036 destmem =
23037 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23038 srcmem =
23039 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23040 }
23041 emit_move_insn (destmem, srcmem);
23042 }
23043 }
23044 else
23045 {
23046 rtx tmpreg[4];
23047 gcc_assert (unroll <= 4);
23048 for (i = 0; i < unroll; i++)
23049 {
23050 tmpreg[i] = gen_reg_rtx (mode);
23051 if (i)
23052 {
23053 srcmem =
23054 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23055 }
23056 emit_move_insn (tmpreg[i], srcmem);
23057 }
23058 for (i = 0; i < unroll; i++)
23059 {
23060 if (i)
23061 {
23062 destmem =
23063 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23064 }
23065 emit_move_insn (destmem, tmpreg[i]);
23066 }
23067 }
23068 }
23069 else
23070 for (i = 0; i < unroll; i++)
23071 {
23072 if (i)
23073 destmem =
23074 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23075 emit_move_insn (destmem, value);
23076 }
23077
23078 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23079 true, OPTAB_LIB_WIDEN);
23080 if (tmp != iter)
23081 emit_move_insn (iter, tmp);
23082
23083 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23084 true, top_label);
23085 if (expected_size != -1)
23086 {
23087 expected_size /= GET_MODE_SIZE (mode) * unroll;
23088 if (expected_size == 0)
23089 predict_jump (0);
23090 else if (expected_size > REG_BR_PROB_BASE)
23091 predict_jump (REG_BR_PROB_BASE - 1);
23092 else
23093 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23094 }
23095 else
23096 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23097 iter = ix86_zero_extend_to_Pmode (iter);
23098 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23099 true, OPTAB_LIB_WIDEN);
23100 if (tmp != destptr)
23101 emit_move_insn (destptr, tmp);
23102 if (!issetmem)
23103 {
23104 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23105 true, OPTAB_LIB_WIDEN);
23106 if (tmp != srcptr)
23107 emit_move_insn (srcptr, tmp);
23108 }
23109 emit_label (out_label);
23110 }
23111
23112 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23113 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23114 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23115 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23116 ORIG_VALUE is the original value passed to memset to fill the memory with.
23117 Other arguments have same meaning as for previous function. */
23118
23119 static void
23120 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23121 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23122 rtx count,
23123 enum machine_mode mode, bool issetmem)
23124 {
23125 rtx destexp;
23126 rtx srcexp;
23127 rtx countreg;
23128 HOST_WIDE_INT rounded_count;
23129
23130 /* If possible, it is shorter to use rep movs.
23131 TODO: Maybe it is better to move this logic to decide_alg. */
23132 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23133 && (!issetmem || orig_value == const0_rtx))
23134 mode = SImode;
23135
23136 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23137 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23138
23139 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23140 GET_MODE_SIZE (mode)));
23141 if (mode != QImode)
23142 {
23143 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23144 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23145 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23146 }
23147 else
23148 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23149 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23150 {
23151 rounded_count = (INTVAL (count)
23152 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23153 destmem = shallow_copy_rtx (destmem);
23154 set_mem_size (destmem, rounded_count);
23155 }
23156 else if (MEM_SIZE_KNOWN_P (destmem))
23157 clear_mem_size (destmem);
23158
23159 if (issetmem)
23160 {
23161 value = force_reg (mode, gen_lowpart (mode, value));
23162 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23163 }
23164 else
23165 {
23166 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23167 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23168 if (mode != QImode)
23169 {
23170 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23171 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23172 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23173 }
23174 else
23175 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23176 if (CONST_INT_P (count))
23177 {
23178 rounded_count = (INTVAL (count)
23179 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23180 srcmem = shallow_copy_rtx (srcmem);
23181 set_mem_size (srcmem, rounded_count);
23182 }
23183 else
23184 {
23185 if (MEM_SIZE_KNOWN_P (srcmem))
23186 clear_mem_size (srcmem);
23187 }
23188 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23189 destexp, srcexp));
23190 }
23191 }
23192
23193 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23194 DESTMEM.
23195 SRC is passed by pointer to be updated on return.
23196 Return value is updated DST. */
23197 static rtx
23198 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23199 HOST_WIDE_INT size_to_move)
23200 {
23201 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23202 enum insn_code code;
23203 enum machine_mode move_mode;
23204 int piece_size, i;
23205
23206 /* Find the widest mode in which we could perform moves.
23207 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23208 it until move of such size is supported. */
23209 piece_size = 1 << floor_log2 (size_to_move);
23210 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23211 code = optab_handler (mov_optab, move_mode);
23212 while (code == CODE_FOR_nothing && piece_size > 1)
23213 {
23214 piece_size >>= 1;
23215 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23216 code = optab_handler (mov_optab, move_mode);
23217 }
23218
23219 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23220 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23221 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23222 {
23223 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23224 move_mode = mode_for_vector (word_mode, nunits);
23225 code = optab_handler (mov_optab, move_mode);
23226 if (code == CODE_FOR_nothing)
23227 {
23228 move_mode = word_mode;
23229 piece_size = GET_MODE_SIZE (move_mode);
23230 code = optab_handler (mov_optab, move_mode);
23231 }
23232 }
23233 gcc_assert (code != CODE_FOR_nothing);
23234
23235 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23236 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23237
23238 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23239 gcc_assert (size_to_move % piece_size == 0);
23240 adjust = GEN_INT (piece_size);
23241 for (i = 0; i < size_to_move; i += piece_size)
23242 {
23243 /* We move from memory to memory, so we'll need to do it via
23244 a temporary register. */
23245 tempreg = gen_reg_rtx (move_mode);
23246 emit_insn (GEN_FCN (code) (tempreg, src));
23247 emit_insn (GEN_FCN (code) (dst, tempreg));
23248
23249 emit_move_insn (destptr,
23250 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23251 emit_move_insn (srcptr,
23252 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23253
23254 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23255 piece_size);
23256 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23257 piece_size);
23258 }
23259
23260 /* Update DST and SRC rtx. */
23261 *srcmem = src;
23262 return dst;
23263 }
23264
23265 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23266 static void
23267 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23268 rtx destptr, rtx srcptr, rtx count, int max_size)
23269 {
23270 rtx src, dest;
23271 if (CONST_INT_P (count))
23272 {
23273 HOST_WIDE_INT countval = INTVAL (count);
23274 HOST_WIDE_INT epilogue_size = countval % max_size;
23275 int i;
23276
23277 /* For now MAX_SIZE should be a power of 2. This assert could be
23278 relaxed, but it'll require a bit more complicated epilogue
23279 expanding. */
23280 gcc_assert ((max_size & (max_size - 1)) == 0);
23281 for (i = max_size; i >= 1; i >>= 1)
23282 {
23283 if (epilogue_size & i)
23284 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23285 }
23286 return;
23287 }
23288 if (max_size > 8)
23289 {
23290 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23291 count, 1, OPTAB_DIRECT);
23292 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23293 count, QImode, 1, 4, false);
23294 return;
23295 }
23296
23297 /* When there are stringops, we can cheaply increase dest and src pointers.
23298 Otherwise we save code size by maintaining offset (zero is readily
23299 available from preceding rep operation) and using x86 addressing modes.
23300 */
23301 if (TARGET_SINGLE_STRINGOP)
23302 {
23303 if (max_size > 4)
23304 {
23305 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23306 src = change_address (srcmem, SImode, srcptr);
23307 dest = change_address (destmem, SImode, destptr);
23308 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23309 emit_label (label);
23310 LABEL_NUSES (label) = 1;
23311 }
23312 if (max_size > 2)
23313 {
23314 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23315 src = change_address (srcmem, HImode, srcptr);
23316 dest = change_address (destmem, HImode, destptr);
23317 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23318 emit_label (label);
23319 LABEL_NUSES (label) = 1;
23320 }
23321 if (max_size > 1)
23322 {
23323 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23324 src = change_address (srcmem, QImode, srcptr);
23325 dest = change_address (destmem, QImode, destptr);
23326 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23327 emit_label (label);
23328 LABEL_NUSES (label) = 1;
23329 }
23330 }
23331 else
23332 {
23333 rtx offset = force_reg (Pmode, const0_rtx);
23334 rtx tmp;
23335
23336 if (max_size > 4)
23337 {
23338 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23339 src = change_address (srcmem, SImode, srcptr);
23340 dest = change_address (destmem, SImode, destptr);
23341 emit_move_insn (dest, src);
23342 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23343 true, OPTAB_LIB_WIDEN);
23344 if (tmp != offset)
23345 emit_move_insn (offset, tmp);
23346 emit_label (label);
23347 LABEL_NUSES (label) = 1;
23348 }
23349 if (max_size > 2)
23350 {
23351 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23352 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23353 src = change_address (srcmem, HImode, tmp);
23354 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23355 dest = change_address (destmem, HImode, tmp);
23356 emit_move_insn (dest, src);
23357 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23358 true, OPTAB_LIB_WIDEN);
23359 if (tmp != offset)
23360 emit_move_insn (offset, tmp);
23361 emit_label (label);
23362 LABEL_NUSES (label) = 1;
23363 }
23364 if (max_size > 1)
23365 {
23366 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23367 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23368 src = change_address (srcmem, QImode, tmp);
23369 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23370 dest = change_address (destmem, QImode, tmp);
23371 emit_move_insn (dest, src);
23372 emit_label (label);
23373 LABEL_NUSES (label) = 1;
23374 }
23375 }
23376 }
23377
23378 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23379 with value PROMOTED_VAL.
23380 SRC is passed by pointer to be updated on return.
23381 Return value is updated DST. */
23382 static rtx
23383 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23384 HOST_WIDE_INT size_to_move)
23385 {
23386 rtx dst = destmem, adjust;
23387 enum insn_code code;
23388 enum machine_mode move_mode;
23389 int piece_size, i;
23390
23391 /* Find the widest mode in which we could perform moves.
23392 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23393 it until move of such size is supported. */
23394 move_mode = GET_MODE (promoted_val);
23395 if (move_mode == VOIDmode)
23396 move_mode = QImode;
23397 if (size_to_move < GET_MODE_SIZE (move_mode))
23398 {
23399 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23400 promoted_val = gen_lowpart (move_mode, promoted_val);
23401 }
23402 piece_size = GET_MODE_SIZE (move_mode);
23403 code = optab_handler (mov_optab, move_mode);
23404 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23405
23406 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23407
23408 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23409 gcc_assert (size_to_move % piece_size == 0);
23410 adjust = GEN_INT (piece_size);
23411 for (i = 0; i < size_to_move; i += piece_size)
23412 {
23413 if (piece_size <= GET_MODE_SIZE (word_mode))
23414 {
23415 emit_insn (gen_strset (destptr, dst, promoted_val));
23416 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23417 piece_size);
23418 continue;
23419 }
23420
23421 emit_insn (GEN_FCN (code) (dst, promoted_val));
23422
23423 emit_move_insn (destptr,
23424 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23425
23426 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23427 piece_size);
23428 }
23429
23430 /* Update DST rtx. */
23431 return dst;
23432 }
23433 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23434 static void
23435 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23436 rtx count, int max_size)
23437 {
23438 count =
23439 expand_simple_binop (counter_mode (count), AND, count,
23440 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23441 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23442 gen_lowpart (QImode, value), count, QImode,
23443 1, max_size / 2, true);
23444 }
23445
23446 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23447 static void
23448 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23449 rtx count, int max_size)
23450 {
23451 rtx dest;
23452
23453 if (CONST_INT_P (count))
23454 {
23455 HOST_WIDE_INT countval = INTVAL (count);
23456 HOST_WIDE_INT epilogue_size = countval % max_size;
23457 int i;
23458
23459 /* For now MAX_SIZE should be a power of 2. This assert could be
23460 relaxed, but it'll require a bit more complicated epilogue
23461 expanding. */
23462 gcc_assert ((max_size & (max_size - 1)) == 0);
23463 for (i = max_size; i >= 1; i >>= 1)
23464 {
23465 if (epilogue_size & i)
23466 {
23467 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23468 destmem = emit_memset (destmem, destptr, vec_value, i);
23469 else
23470 destmem = emit_memset (destmem, destptr, value, i);
23471 }
23472 }
23473 return;
23474 }
23475 if (max_size > 32)
23476 {
23477 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23478 return;
23479 }
23480 if (max_size > 16)
23481 {
23482 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23483 if (TARGET_64BIT)
23484 {
23485 dest = change_address (destmem, DImode, destptr);
23486 emit_insn (gen_strset (destptr, dest, value));
23487 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23488 emit_insn (gen_strset (destptr, dest, value));
23489 }
23490 else
23491 {
23492 dest = change_address (destmem, SImode, destptr);
23493 emit_insn (gen_strset (destptr, dest, value));
23494 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23495 emit_insn (gen_strset (destptr, dest, value));
23496 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23497 emit_insn (gen_strset (destptr, dest, value));
23498 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23499 emit_insn (gen_strset (destptr, dest, value));
23500 }
23501 emit_label (label);
23502 LABEL_NUSES (label) = 1;
23503 }
23504 if (max_size > 8)
23505 {
23506 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23507 if (TARGET_64BIT)
23508 {
23509 dest = change_address (destmem, DImode, destptr);
23510 emit_insn (gen_strset (destptr, dest, value));
23511 }
23512 else
23513 {
23514 dest = change_address (destmem, SImode, destptr);
23515 emit_insn (gen_strset (destptr, dest, value));
23516 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23517 emit_insn (gen_strset (destptr, dest, value));
23518 }
23519 emit_label (label);
23520 LABEL_NUSES (label) = 1;
23521 }
23522 if (max_size > 4)
23523 {
23524 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23525 dest = change_address (destmem, SImode, destptr);
23526 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23527 emit_label (label);
23528 LABEL_NUSES (label) = 1;
23529 }
23530 if (max_size > 2)
23531 {
23532 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23533 dest = change_address (destmem, HImode, destptr);
23534 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23535 emit_label (label);
23536 LABEL_NUSES (label) = 1;
23537 }
23538 if (max_size > 1)
23539 {
23540 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23541 dest = change_address (destmem, QImode, destptr);
23542 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23543 emit_label (label);
23544 LABEL_NUSES (label) = 1;
23545 }
23546 }
23547
23548 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23549 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23550 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23551 ignored.
23552 Return value is updated DESTMEM. */
23553 static rtx
23554 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23555 rtx destptr, rtx srcptr, rtx value,
23556 rtx vec_value, rtx count, int align,
23557 int desired_alignment, bool issetmem)
23558 {
23559 int i;
23560 for (i = 1; i < desired_alignment; i <<= 1)
23561 {
23562 if (align <= i)
23563 {
23564 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23565 if (issetmem)
23566 {
23567 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23568 destmem = emit_memset (destmem, destptr, vec_value, i);
23569 else
23570 destmem = emit_memset (destmem, destptr, value, i);
23571 }
23572 else
23573 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23574 ix86_adjust_counter (count, i);
23575 emit_label (label);
23576 LABEL_NUSES (label) = 1;
23577 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23578 }
23579 }
23580 return destmem;
23581 }
23582
23583 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23584 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23585 and jump to DONE_LABEL. */
23586 static void
23587 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23588 rtx destptr, rtx srcptr,
23589 rtx value, rtx vec_value,
23590 rtx count, int size,
23591 rtx done_label, bool issetmem)
23592 {
23593 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23594 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23595 rtx modesize;
23596 int n;
23597
23598 /* If we do not have vector value to copy, we must reduce size. */
23599 if (issetmem)
23600 {
23601 if (!vec_value)
23602 {
23603 if (GET_MODE (value) == VOIDmode && size > 8)
23604 mode = Pmode;
23605 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23606 mode = GET_MODE (value);
23607 }
23608 else
23609 mode = GET_MODE (vec_value), value = vec_value;
23610 }
23611 else
23612 {
23613 /* Choose appropriate vector mode. */
23614 if (size >= 32)
23615 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23616 else if (size >= 16)
23617 mode = TARGET_SSE ? V16QImode : DImode;
23618 srcmem = change_address (srcmem, mode, srcptr);
23619 }
23620 destmem = change_address (destmem, mode, destptr);
23621 modesize = GEN_INT (GET_MODE_SIZE (mode));
23622 gcc_assert (GET_MODE_SIZE (mode) <= size);
23623 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23624 {
23625 if (issetmem)
23626 emit_move_insn (destmem, gen_lowpart (mode, value));
23627 else
23628 {
23629 emit_move_insn (destmem, srcmem);
23630 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23631 }
23632 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23633 }
23634
23635 destmem = offset_address (destmem, count, 1);
23636 destmem = offset_address (destmem, GEN_INT (-2 * size),
23637 GET_MODE_SIZE (mode));
23638 if (!issetmem)
23639 {
23640 srcmem = offset_address (srcmem, count, 1);
23641 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23642 GET_MODE_SIZE (mode));
23643 }
23644 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23645 {
23646 if (issetmem)
23647 emit_move_insn (destmem, gen_lowpart (mode, value));
23648 else
23649 {
23650 emit_move_insn (destmem, srcmem);
23651 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23652 }
23653 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23654 }
23655 emit_jump_insn (gen_jump (done_label));
23656 emit_barrier ();
23657
23658 emit_label (label);
23659 LABEL_NUSES (label) = 1;
23660 }
23661
23662 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23663 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23664 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23665 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23666 DONE_LABEL is a label after the whole copying sequence. The label is created
23667 on demand if *DONE_LABEL is NULL.
23668 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23669 bounds after the initial copies.
23670
23671 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23672 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23673 we will dispatch to a library call for large blocks.
23674
23675 In pseudocode we do:
23676
23677 if (COUNT < SIZE)
23678 {
23679 Assume that SIZE is 4. Bigger sizes are handled analogously
23680 if (COUNT & 4)
23681 {
23682 copy 4 bytes from SRCPTR to DESTPTR
23683 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23684 goto done_label
23685 }
23686 if (!COUNT)
23687 goto done_label;
23688 copy 1 byte from SRCPTR to DESTPTR
23689 if (COUNT & 2)
23690 {
23691 copy 2 bytes from SRCPTR to DESTPTR
23692 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23693 }
23694 }
23695 else
23696 {
23697 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23698 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23699
23700 OLD_DESPTR = DESTPTR;
23701 Align DESTPTR up to DESIRED_ALIGN
23702 SRCPTR += DESTPTR - OLD_DESTPTR
23703 COUNT -= DEST_PTR - OLD_DESTPTR
23704 if (DYNAMIC_CHECK)
23705 Round COUNT down to multiple of SIZE
23706 << optional caller supplied zero size guard is here >>
23707 << optional caller suppplied dynamic check is here >>
23708 << caller supplied main copy loop is here >>
23709 }
23710 done_label:
23711 */
23712 static void
23713 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23714 rtx *destptr, rtx *srcptr,
23715 enum machine_mode mode,
23716 rtx value, rtx vec_value,
23717 rtx *count,
23718 rtx_code_label **done_label,
23719 int size,
23720 int desired_align,
23721 int align,
23722 unsigned HOST_WIDE_INT *min_size,
23723 bool dynamic_check,
23724 bool issetmem)
23725 {
23726 rtx_code_label *loop_label = NULL, *label;
23727 int n;
23728 rtx modesize;
23729 int prolog_size = 0;
23730 rtx mode_value;
23731
23732 /* Chose proper value to copy. */
23733 if (issetmem && VECTOR_MODE_P (mode))
23734 mode_value = vec_value;
23735 else
23736 mode_value = value;
23737 gcc_assert (GET_MODE_SIZE (mode) <= size);
23738
23739 /* See if block is big or small, handle small blocks. */
23740 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23741 {
23742 int size2 = size;
23743 loop_label = gen_label_rtx ();
23744
23745 if (!*done_label)
23746 *done_label = gen_label_rtx ();
23747
23748 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23749 1, loop_label);
23750 size2 >>= 1;
23751
23752 /* Handle sizes > 3. */
23753 for (;size2 > 2; size2 >>= 1)
23754 expand_small_movmem_or_setmem (destmem, srcmem,
23755 *destptr, *srcptr,
23756 value, vec_value,
23757 *count,
23758 size2, *done_label, issetmem);
23759 /* Nothing to copy? Jump to DONE_LABEL if so */
23760 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23761 1, *done_label);
23762
23763 /* Do a byte copy. */
23764 destmem = change_address (destmem, QImode, *destptr);
23765 if (issetmem)
23766 emit_move_insn (destmem, gen_lowpart (QImode, value));
23767 else
23768 {
23769 srcmem = change_address (srcmem, QImode, *srcptr);
23770 emit_move_insn (destmem, srcmem);
23771 }
23772
23773 /* Handle sizes 2 and 3. */
23774 label = ix86_expand_aligntest (*count, 2, false);
23775 destmem = change_address (destmem, HImode, *destptr);
23776 destmem = offset_address (destmem, *count, 1);
23777 destmem = offset_address (destmem, GEN_INT (-2), 2);
23778 if (issetmem)
23779 emit_move_insn (destmem, gen_lowpart (HImode, value));
23780 else
23781 {
23782 srcmem = change_address (srcmem, HImode, *srcptr);
23783 srcmem = offset_address (srcmem, *count, 1);
23784 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23785 emit_move_insn (destmem, srcmem);
23786 }
23787
23788 emit_label (label);
23789 LABEL_NUSES (label) = 1;
23790 emit_jump_insn (gen_jump (*done_label));
23791 emit_barrier ();
23792 }
23793 else
23794 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23795 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23796
23797 /* Start memcpy for COUNT >= SIZE. */
23798 if (loop_label)
23799 {
23800 emit_label (loop_label);
23801 LABEL_NUSES (loop_label) = 1;
23802 }
23803
23804 /* Copy first desired_align bytes. */
23805 if (!issetmem)
23806 srcmem = change_address (srcmem, mode, *srcptr);
23807 destmem = change_address (destmem, mode, *destptr);
23808 modesize = GEN_INT (GET_MODE_SIZE (mode));
23809 for (n = 0; prolog_size < desired_align - align; n++)
23810 {
23811 if (issetmem)
23812 emit_move_insn (destmem, mode_value);
23813 else
23814 {
23815 emit_move_insn (destmem, srcmem);
23816 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23817 }
23818 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23819 prolog_size += GET_MODE_SIZE (mode);
23820 }
23821
23822
23823 /* Copy last SIZE bytes. */
23824 destmem = offset_address (destmem, *count, 1);
23825 destmem = offset_address (destmem,
23826 GEN_INT (-size - prolog_size),
23827 1);
23828 if (issetmem)
23829 emit_move_insn (destmem, mode_value);
23830 else
23831 {
23832 srcmem = offset_address (srcmem, *count, 1);
23833 srcmem = offset_address (srcmem,
23834 GEN_INT (-size - prolog_size),
23835 1);
23836 emit_move_insn (destmem, srcmem);
23837 }
23838 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23839 {
23840 destmem = offset_address (destmem, modesize, 1);
23841 if (issetmem)
23842 emit_move_insn (destmem, mode_value);
23843 else
23844 {
23845 srcmem = offset_address (srcmem, modesize, 1);
23846 emit_move_insn (destmem, srcmem);
23847 }
23848 }
23849
23850 /* Align destination. */
23851 if (desired_align > 1 && desired_align > align)
23852 {
23853 rtx saveddest = *destptr;
23854
23855 gcc_assert (desired_align <= size);
23856 /* Align destptr up, place it to new register. */
23857 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23858 GEN_INT (prolog_size),
23859 NULL_RTX, 1, OPTAB_DIRECT);
23860 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23861 GEN_INT (-desired_align),
23862 *destptr, 1, OPTAB_DIRECT);
23863 /* See how many bytes we skipped. */
23864 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23865 *destptr,
23866 saveddest, 1, OPTAB_DIRECT);
23867 /* Adjust srcptr and count. */
23868 if (!issetmem)
23869 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23870 *srcptr, 1, OPTAB_DIRECT);
23871 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23872 saveddest, *count, 1, OPTAB_DIRECT);
23873 /* We copied at most size + prolog_size. */
23874 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23875 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23876 else
23877 *min_size = 0;
23878
23879 /* Our loops always round down the bock size, but for dispatch to library
23880 we need precise value. */
23881 if (dynamic_check)
23882 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23883 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23884 }
23885 else
23886 {
23887 gcc_assert (prolog_size == 0);
23888 /* Decrease count, so we won't end up copying last word twice. */
23889 if (!CONST_INT_P (*count))
23890 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23891 constm1_rtx, *count, 1, OPTAB_DIRECT);
23892 else
23893 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23894 if (*min_size)
23895 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23896 }
23897 }
23898
23899
23900 /* This function is like the previous one, except here we know how many bytes
23901 need to be copied. That allows us to update alignment not only of DST, which
23902 is returned, but also of SRC, which is passed as a pointer for that
23903 reason. */
23904 static rtx
23905 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23906 rtx srcreg, rtx value, rtx vec_value,
23907 int desired_align, int align_bytes,
23908 bool issetmem)
23909 {
23910 rtx src = NULL;
23911 rtx orig_dst = dst;
23912 rtx orig_src = NULL;
23913 int piece_size = 1;
23914 int copied_bytes = 0;
23915
23916 if (!issetmem)
23917 {
23918 gcc_assert (srcp != NULL);
23919 src = *srcp;
23920 orig_src = src;
23921 }
23922
23923 for (piece_size = 1;
23924 piece_size <= desired_align && copied_bytes < align_bytes;
23925 piece_size <<= 1)
23926 {
23927 if (align_bytes & piece_size)
23928 {
23929 if (issetmem)
23930 {
23931 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23932 dst = emit_memset (dst, destreg, vec_value, piece_size);
23933 else
23934 dst = emit_memset (dst, destreg, value, piece_size);
23935 }
23936 else
23937 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23938 copied_bytes += piece_size;
23939 }
23940 }
23941 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23942 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23943 if (MEM_SIZE_KNOWN_P (orig_dst))
23944 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23945
23946 if (!issetmem)
23947 {
23948 int src_align_bytes = get_mem_align_offset (src, desired_align
23949 * BITS_PER_UNIT);
23950 if (src_align_bytes >= 0)
23951 src_align_bytes = desired_align - src_align_bytes;
23952 if (src_align_bytes >= 0)
23953 {
23954 unsigned int src_align;
23955 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23956 {
23957 if ((src_align_bytes & (src_align - 1))
23958 == (align_bytes & (src_align - 1)))
23959 break;
23960 }
23961 if (src_align > (unsigned int) desired_align)
23962 src_align = desired_align;
23963 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23964 set_mem_align (src, src_align * BITS_PER_UNIT);
23965 }
23966 if (MEM_SIZE_KNOWN_P (orig_src))
23967 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23968 *srcp = src;
23969 }
23970
23971 return dst;
23972 }
23973
23974 /* Return true if ALG can be used in current context.
23975 Assume we expand memset if MEMSET is true. */
23976 static bool
23977 alg_usable_p (enum stringop_alg alg, bool memset)
23978 {
23979 if (alg == no_stringop)
23980 return false;
23981 if (alg == vector_loop)
23982 return TARGET_SSE || TARGET_AVX;
23983 /* Algorithms using the rep prefix want at least edi and ecx;
23984 additionally, memset wants eax and memcpy wants esi. Don't
23985 consider such algorithms if the user has appropriated those
23986 registers for their own purposes. */
23987 if (alg == rep_prefix_1_byte
23988 || alg == rep_prefix_4_byte
23989 || alg == rep_prefix_8_byte)
23990 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23991 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23992 return true;
23993 }
23994
23995 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23996 static enum stringop_alg
23997 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23998 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23999 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24000 {
24001 const struct stringop_algs * algs;
24002 bool optimize_for_speed;
24003 int max = 0;
24004 const struct processor_costs *cost;
24005 int i;
24006 bool any_alg_usable_p = false;
24007
24008 *noalign = false;
24009 *dynamic_check = -1;
24010
24011 /* Even if the string operation call is cold, we still might spend a lot
24012 of time processing large blocks. */
24013 if (optimize_function_for_size_p (cfun)
24014 || (optimize_insn_for_size_p ()
24015 && (max_size < 256
24016 || (expected_size != -1 && expected_size < 256))))
24017 optimize_for_speed = false;
24018 else
24019 optimize_for_speed = true;
24020
24021 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24022 if (memset)
24023 algs = &cost->memset[TARGET_64BIT != 0];
24024 else
24025 algs = &cost->memcpy[TARGET_64BIT != 0];
24026
24027 /* See maximal size for user defined algorithm. */
24028 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24029 {
24030 enum stringop_alg candidate = algs->size[i].alg;
24031 bool usable = alg_usable_p (candidate, memset);
24032 any_alg_usable_p |= usable;
24033
24034 if (candidate != libcall && candidate && usable)
24035 max = algs->size[i].max;
24036 }
24037
24038 /* If expected size is not known but max size is small enough
24039 so inline version is a win, set expected size into
24040 the range. */
24041 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24042 && expected_size == -1)
24043 expected_size = min_size / 2 + max_size / 2;
24044
24045 /* If user specified the algorithm, honnor it if possible. */
24046 if (ix86_stringop_alg != no_stringop
24047 && alg_usable_p (ix86_stringop_alg, memset))
24048 return ix86_stringop_alg;
24049 /* rep; movq or rep; movl is the smallest variant. */
24050 else if (!optimize_for_speed)
24051 {
24052 *noalign = true;
24053 if (!count || (count & 3) || (memset && !zero_memset))
24054 return alg_usable_p (rep_prefix_1_byte, memset)
24055 ? rep_prefix_1_byte : loop_1_byte;
24056 else
24057 return alg_usable_p (rep_prefix_4_byte, memset)
24058 ? rep_prefix_4_byte : loop;
24059 }
24060 /* Very tiny blocks are best handled via the loop, REP is expensive to
24061 setup. */
24062 else if (expected_size != -1 && expected_size < 4)
24063 return loop_1_byte;
24064 else if (expected_size != -1)
24065 {
24066 enum stringop_alg alg = libcall;
24067 bool alg_noalign = false;
24068 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24069 {
24070 /* We get here if the algorithms that were not libcall-based
24071 were rep-prefix based and we are unable to use rep prefixes
24072 based on global register usage. Break out of the loop and
24073 use the heuristic below. */
24074 if (algs->size[i].max == 0)
24075 break;
24076 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24077 {
24078 enum stringop_alg candidate = algs->size[i].alg;
24079
24080 if (candidate != libcall && alg_usable_p (candidate, memset))
24081 {
24082 alg = candidate;
24083 alg_noalign = algs->size[i].noalign;
24084 }
24085 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24086 last non-libcall inline algorithm. */
24087 if (TARGET_INLINE_ALL_STRINGOPS)
24088 {
24089 /* When the current size is best to be copied by a libcall,
24090 but we are still forced to inline, run the heuristic below
24091 that will pick code for medium sized blocks. */
24092 if (alg != libcall)
24093 {
24094 *noalign = alg_noalign;
24095 return alg;
24096 }
24097 break;
24098 }
24099 else if (alg_usable_p (candidate, memset))
24100 {
24101 *noalign = algs->size[i].noalign;
24102 return candidate;
24103 }
24104 }
24105 }
24106 }
24107 /* When asked to inline the call anyway, try to pick meaningful choice.
24108 We look for maximal size of block that is faster to copy by hand and
24109 take blocks of at most of that size guessing that average size will
24110 be roughly half of the block.
24111
24112 If this turns out to be bad, we might simply specify the preferred
24113 choice in ix86_costs. */
24114 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24115 && (algs->unknown_size == libcall
24116 || !alg_usable_p (algs->unknown_size, memset)))
24117 {
24118 enum stringop_alg alg;
24119
24120 /* If there aren't any usable algorithms, then recursing on
24121 smaller sizes isn't going to find anything. Just return the
24122 simple byte-at-a-time copy loop. */
24123 if (!any_alg_usable_p)
24124 {
24125 /* Pick something reasonable. */
24126 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24127 *dynamic_check = 128;
24128 return loop_1_byte;
24129 }
24130 if (max <= 0)
24131 max = 4096;
24132 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24133 zero_memset, dynamic_check, noalign);
24134 gcc_assert (*dynamic_check == -1);
24135 gcc_assert (alg != libcall);
24136 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24137 *dynamic_check = max;
24138 return alg;
24139 }
24140 return (alg_usable_p (algs->unknown_size, memset)
24141 ? algs->unknown_size : libcall);
24142 }
24143
24144 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24145 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24146 static int
24147 decide_alignment (int align,
24148 enum stringop_alg alg,
24149 int expected_size,
24150 enum machine_mode move_mode)
24151 {
24152 int desired_align = 0;
24153
24154 gcc_assert (alg != no_stringop);
24155
24156 if (alg == libcall)
24157 return 0;
24158 if (move_mode == VOIDmode)
24159 return 0;
24160
24161 desired_align = GET_MODE_SIZE (move_mode);
24162 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24163 copying whole cacheline at once. */
24164 if (TARGET_PENTIUMPRO
24165 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24166 desired_align = 8;
24167
24168 if (optimize_size)
24169 desired_align = 1;
24170 if (desired_align < align)
24171 desired_align = align;
24172 if (expected_size != -1 && expected_size < 4)
24173 desired_align = align;
24174
24175 return desired_align;
24176 }
24177
24178
24179 /* Helper function for memcpy. For QImode value 0xXY produce
24180 0xXYXYXYXY of wide specified by MODE. This is essentially
24181 a * 0x10101010, but we can do slightly better than
24182 synth_mult by unwinding the sequence by hand on CPUs with
24183 slow multiply. */
24184 static rtx
24185 promote_duplicated_reg (enum machine_mode mode, rtx val)
24186 {
24187 enum machine_mode valmode = GET_MODE (val);
24188 rtx tmp;
24189 int nops = mode == DImode ? 3 : 2;
24190
24191 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24192 if (val == const0_rtx)
24193 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24194 if (CONST_INT_P (val))
24195 {
24196 HOST_WIDE_INT v = INTVAL (val) & 255;
24197
24198 v |= v << 8;
24199 v |= v << 16;
24200 if (mode == DImode)
24201 v |= (v << 16) << 16;
24202 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24203 }
24204
24205 if (valmode == VOIDmode)
24206 valmode = QImode;
24207 if (valmode != QImode)
24208 val = gen_lowpart (QImode, val);
24209 if (mode == QImode)
24210 return val;
24211 if (!TARGET_PARTIAL_REG_STALL)
24212 nops--;
24213 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24214 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24215 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24216 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24217 {
24218 rtx reg = convert_modes (mode, QImode, val, true);
24219 tmp = promote_duplicated_reg (mode, const1_rtx);
24220 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24221 OPTAB_DIRECT);
24222 }
24223 else
24224 {
24225 rtx reg = convert_modes (mode, QImode, val, true);
24226
24227 if (!TARGET_PARTIAL_REG_STALL)
24228 if (mode == SImode)
24229 emit_insn (gen_movsi_insv_1 (reg, reg));
24230 else
24231 emit_insn (gen_movdi_insv_1 (reg, reg));
24232 else
24233 {
24234 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24235 NULL, 1, OPTAB_DIRECT);
24236 reg =
24237 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24238 }
24239 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24240 NULL, 1, OPTAB_DIRECT);
24241 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24242 if (mode == SImode)
24243 return reg;
24244 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24245 NULL, 1, OPTAB_DIRECT);
24246 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24247 return reg;
24248 }
24249 }
24250
24251 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24252 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24253 alignment from ALIGN to DESIRED_ALIGN. */
24254 static rtx
24255 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24256 int align)
24257 {
24258 rtx promoted_val;
24259
24260 if (TARGET_64BIT
24261 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24262 promoted_val = promote_duplicated_reg (DImode, val);
24263 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24264 promoted_val = promote_duplicated_reg (SImode, val);
24265 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24266 promoted_val = promote_duplicated_reg (HImode, val);
24267 else
24268 promoted_val = val;
24269
24270 return promoted_val;
24271 }
24272
24273 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24274 operations when profitable. The code depends upon architecture, block size
24275 and alignment, but always has one of the following overall structures:
24276
24277 Aligned move sequence:
24278
24279 1) Prologue guard: Conditional that jumps up to epilogues for small
24280 blocks that can be handled by epilogue alone. This is faster
24281 but also needed for correctness, since prologue assume the block
24282 is larger than the desired alignment.
24283
24284 Optional dynamic check for size and libcall for large
24285 blocks is emitted here too, with -minline-stringops-dynamically.
24286
24287 2) Prologue: copy first few bytes in order to get destination
24288 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24289 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24290 copied. We emit either a jump tree on power of two sized
24291 blocks, or a byte loop.
24292
24293 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24294 with specified algorithm.
24295
24296 4) Epilogue: code copying tail of the block that is too small to be
24297 handled by main body (or up to size guarded by prologue guard).
24298
24299 Misaligned move sequence
24300
24301 1) missaligned move prologue/epilogue containing:
24302 a) Prologue handling small memory blocks and jumping to done_label
24303 (skipped if blocks are known to be large enough)
24304 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24305 needed by single possibly misaligned move
24306 (skipped if alignment is not needed)
24307 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24308
24309 2) Zero size guard dispatching to done_label, if needed
24310
24311 3) dispatch to library call, if needed,
24312
24313 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24314 with specified algorithm. */
24315 bool
24316 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24317 rtx align_exp, rtx expected_align_exp,
24318 rtx expected_size_exp, rtx min_size_exp,
24319 rtx max_size_exp, rtx probable_max_size_exp,
24320 bool issetmem)
24321 {
24322 rtx destreg;
24323 rtx srcreg = NULL;
24324 rtx_code_label *label = NULL;
24325 rtx tmp;
24326 rtx_code_label *jump_around_label = NULL;
24327 HOST_WIDE_INT align = 1;
24328 unsigned HOST_WIDE_INT count = 0;
24329 HOST_WIDE_INT expected_size = -1;
24330 int size_needed = 0, epilogue_size_needed;
24331 int desired_align = 0, align_bytes = 0;
24332 enum stringop_alg alg;
24333 rtx promoted_val = NULL;
24334 rtx vec_promoted_val = NULL;
24335 bool force_loopy_epilogue = false;
24336 int dynamic_check;
24337 bool need_zero_guard = false;
24338 bool noalign;
24339 enum machine_mode move_mode = VOIDmode;
24340 int unroll_factor = 1;
24341 /* TODO: Once value ranges are available, fill in proper data. */
24342 unsigned HOST_WIDE_INT min_size = 0;
24343 unsigned HOST_WIDE_INT max_size = -1;
24344 unsigned HOST_WIDE_INT probable_max_size = -1;
24345 bool misaligned_prologue_used = false;
24346
24347 if (CONST_INT_P (align_exp))
24348 align = INTVAL (align_exp);
24349 /* i386 can do misaligned access on reasonably increased cost. */
24350 if (CONST_INT_P (expected_align_exp)
24351 && INTVAL (expected_align_exp) > align)
24352 align = INTVAL (expected_align_exp);
24353 /* ALIGN is the minimum of destination and source alignment, but we care here
24354 just about destination alignment. */
24355 else if (!issetmem
24356 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24357 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24358
24359 if (CONST_INT_P (count_exp))
24360 {
24361 min_size = max_size = probable_max_size = count = expected_size
24362 = INTVAL (count_exp);
24363 /* When COUNT is 0, there is nothing to do. */
24364 if (!count)
24365 return true;
24366 }
24367 else
24368 {
24369 if (min_size_exp)
24370 min_size = INTVAL (min_size_exp);
24371 if (max_size_exp)
24372 max_size = INTVAL (max_size_exp);
24373 if (probable_max_size_exp)
24374 probable_max_size = INTVAL (probable_max_size_exp);
24375 if (CONST_INT_P (expected_size_exp))
24376 expected_size = INTVAL (expected_size_exp);
24377 }
24378
24379 /* Make sure we don't need to care about overflow later on. */
24380 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24381 return false;
24382
24383 /* Step 0: Decide on preferred algorithm, desired alignment and
24384 size of chunks to be copied by main loop. */
24385 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24386 issetmem,
24387 issetmem && val_exp == const0_rtx,
24388 &dynamic_check, &noalign);
24389 if (alg == libcall)
24390 return false;
24391 gcc_assert (alg != no_stringop);
24392
24393 /* For now vector-version of memset is generated only for memory zeroing, as
24394 creating of promoted vector value is very cheap in this case. */
24395 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24396 alg = unrolled_loop;
24397
24398 if (!count)
24399 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24400 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24401 if (!issetmem)
24402 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24403
24404 unroll_factor = 1;
24405 move_mode = word_mode;
24406 switch (alg)
24407 {
24408 case libcall:
24409 case no_stringop:
24410 case last_alg:
24411 gcc_unreachable ();
24412 case loop_1_byte:
24413 need_zero_guard = true;
24414 move_mode = QImode;
24415 break;
24416 case loop:
24417 need_zero_guard = true;
24418 break;
24419 case unrolled_loop:
24420 need_zero_guard = true;
24421 unroll_factor = (TARGET_64BIT ? 4 : 2);
24422 break;
24423 case vector_loop:
24424 need_zero_guard = true;
24425 unroll_factor = 4;
24426 /* Find the widest supported mode. */
24427 move_mode = word_mode;
24428 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24429 != CODE_FOR_nothing)
24430 move_mode = GET_MODE_WIDER_MODE (move_mode);
24431
24432 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24433 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24434 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24435 {
24436 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24437 move_mode = mode_for_vector (word_mode, nunits);
24438 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24439 move_mode = word_mode;
24440 }
24441 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24442 break;
24443 case rep_prefix_8_byte:
24444 move_mode = DImode;
24445 break;
24446 case rep_prefix_4_byte:
24447 move_mode = SImode;
24448 break;
24449 case rep_prefix_1_byte:
24450 move_mode = QImode;
24451 break;
24452 }
24453 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24454 epilogue_size_needed = size_needed;
24455
24456 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24457 if (!TARGET_ALIGN_STRINGOPS || noalign)
24458 align = desired_align;
24459
24460 /* Step 1: Prologue guard. */
24461
24462 /* Alignment code needs count to be in register. */
24463 if (CONST_INT_P (count_exp) && desired_align > align)
24464 {
24465 if (INTVAL (count_exp) > desired_align
24466 && INTVAL (count_exp) > size_needed)
24467 {
24468 align_bytes
24469 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24470 if (align_bytes <= 0)
24471 align_bytes = 0;
24472 else
24473 align_bytes = desired_align - align_bytes;
24474 }
24475 if (align_bytes == 0)
24476 count_exp = force_reg (counter_mode (count_exp), count_exp);
24477 }
24478 gcc_assert (desired_align >= 1 && align >= 1);
24479
24480 /* Misaligned move sequences handle both prologue and epilogue at once.
24481 Default code generation results in a smaller code for large alignments
24482 and also avoids redundant job when sizes are known precisely. */
24483 misaligned_prologue_used
24484 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24485 && MAX (desired_align, epilogue_size_needed) <= 32
24486 && desired_align <= epilogue_size_needed
24487 && ((desired_align > align && !align_bytes)
24488 || (!count && epilogue_size_needed > 1)));
24489
24490 /* Do the cheap promotion to allow better CSE across the
24491 main loop and epilogue (ie one load of the big constant in the
24492 front of all code.
24493 For now the misaligned move sequences do not have fast path
24494 without broadcasting. */
24495 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24496 {
24497 if (alg == vector_loop)
24498 {
24499 gcc_assert (val_exp == const0_rtx);
24500 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24501 promoted_val = promote_duplicated_reg_to_size (val_exp,
24502 GET_MODE_SIZE (word_mode),
24503 desired_align, align);
24504 }
24505 else
24506 {
24507 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24508 desired_align, align);
24509 }
24510 }
24511 /* Misaligned move sequences handles both prologues and epilogues at once.
24512 Default code generation results in smaller code for large alignments and
24513 also avoids redundant job when sizes are known precisely. */
24514 if (misaligned_prologue_used)
24515 {
24516 /* Misaligned move prologue handled small blocks by itself. */
24517 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24518 (dst, src, &destreg, &srcreg,
24519 move_mode, promoted_val, vec_promoted_val,
24520 &count_exp,
24521 &jump_around_label,
24522 desired_align < align
24523 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24524 desired_align, align, &min_size, dynamic_check, issetmem);
24525 if (!issetmem)
24526 src = change_address (src, BLKmode, srcreg);
24527 dst = change_address (dst, BLKmode, destreg);
24528 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24529 epilogue_size_needed = 0;
24530 if (need_zero_guard && !min_size)
24531 {
24532 /* It is possible that we copied enough so the main loop will not
24533 execute. */
24534 gcc_assert (size_needed > 1);
24535 if (jump_around_label == NULL_RTX)
24536 jump_around_label = gen_label_rtx ();
24537 emit_cmp_and_jump_insns (count_exp,
24538 GEN_INT (size_needed),
24539 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24540 if (expected_size == -1
24541 || expected_size < (desired_align - align) / 2 + size_needed)
24542 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24543 else
24544 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24545 }
24546 }
24547 /* Ensure that alignment prologue won't copy past end of block. */
24548 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24549 {
24550 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24551 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24552 Make sure it is power of 2. */
24553 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24554
24555 /* To improve performance of small blocks, we jump around the VAL
24556 promoting mode. This mean that if the promoted VAL is not constant,
24557 we might not use it in the epilogue and have to use byte
24558 loop variant. */
24559 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24560 force_loopy_epilogue = true;
24561 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24562 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24563 {
24564 /* If main algorithm works on QImode, no epilogue is needed.
24565 For small sizes just don't align anything. */
24566 if (size_needed == 1)
24567 desired_align = align;
24568 else
24569 goto epilogue;
24570 }
24571 else if (!count
24572 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24573 {
24574 label = gen_label_rtx ();
24575 emit_cmp_and_jump_insns (count_exp,
24576 GEN_INT (epilogue_size_needed),
24577 LTU, 0, counter_mode (count_exp), 1, label);
24578 if (expected_size == -1 || expected_size < epilogue_size_needed)
24579 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24580 else
24581 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24582 }
24583 }
24584
24585 /* Emit code to decide on runtime whether library call or inline should be
24586 used. */
24587 if (dynamic_check != -1)
24588 {
24589 if (!issetmem && CONST_INT_P (count_exp))
24590 {
24591 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24592 {
24593 emit_block_move_via_libcall (dst, src, count_exp, false);
24594 count_exp = const0_rtx;
24595 goto epilogue;
24596 }
24597 }
24598 else
24599 {
24600 rtx_code_label *hot_label = gen_label_rtx ();
24601 if (jump_around_label == NULL_RTX)
24602 jump_around_label = gen_label_rtx ();
24603 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24604 LEU, 0, counter_mode (count_exp),
24605 1, hot_label);
24606 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24607 if (issetmem)
24608 set_storage_via_libcall (dst, count_exp, val_exp, false);
24609 else
24610 emit_block_move_via_libcall (dst, src, count_exp, false);
24611 emit_jump (jump_around_label);
24612 emit_label (hot_label);
24613 }
24614 }
24615
24616 /* Step 2: Alignment prologue. */
24617 /* Do the expensive promotion once we branched off the small blocks. */
24618 if (issetmem && !promoted_val)
24619 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24620 desired_align, align);
24621
24622 if (desired_align > align && !misaligned_prologue_used)
24623 {
24624 if (align_bytes == 0)
24625 {
24626 /* Except for the first move in prologue, we no longer know
24627 constant offset in aliasing info. It don't seems to worth
24628 the pain to maintain it for the first move, so throw away
24629 the info early. */
24630 dst = change_address (dst, BLKmode, destreg);
24631 if (!issetmem)
24632 src = change_address (src, BLKmode, srcreg);
24633 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24634 promoted_val, vec_promoted_val,
24635 count_exp, align, desired_align,
24636 issetmem);
24637 /* At most desired_align - align bytes are copied. */
24638 if (min_size < (unsigned)(desired_align - align))
24639 min_size = 0;
24640 else
24641 min_size -= desired_align - align;
24642 }
24643 else
24644 {
24645 /* If we know how many bytes need to be stored before dst is
24646 sufficiently aligned, maintain aliasing info accurately. */
24647 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24648 srcreg,
24649 promoted_val,
24650 vec_promoted_val,
24651 desired_align,
24652 align_bytes,
24653 issetmem);
24654
24655 count_exp = plus_constant (counter_mode (count_exp),
24656 count_exp, -align_bytes);
24657 count -= align_bytes;
24658 min_size -= align_bytes;
24659 max_size -= align_bytes;
24660 }
24661 if (need_zero_guard
24662 && !min_size
24663 && (count < (unsigned HOST_WIDE_INT) size_needed
24664 || (align_bytes == 0
24665 && count < ((unsigned HOST_WIDE_INT) size_needed
24666 + desired_align - align))))
24667 {
24668 /* It is possible that we copied enough so the main loop will not
24669 execute. */
24670 gcc_assert (size_needed > 1);
24671 if (label == NULL_RTX)
24672 label = gen_label_rtx ();
24673 emit_cmp_and_jump_insns (count_exp,
24674 GEN_INT (size_needed),
24675 LTU, 0, counter_mode (count_exp), 1, label);
24676 if (expected_size == -1
24677 || expected_size < (desired_align - align) / 2 + size_needed)
24678 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24679 else
24680 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24681 }
24682 }
24683 if (label && size_needed == 1)
24684 {
24685 emit_label (label);
24686 LABEL_NUSES (label) = 1;
24687 label = NULL;
24688 epilogue_size_needed = 1;
24689 if (issetmem)
24690 promoted_val = val_exp;
24691 }
24692 else if (label == NULL_RTX && !misaligned_prologue_used)
24693 epilogue_size_needed = size_needed;
24694
24695 /* Step 3: Main loop. */
24696
24697 switch (alg)
24698 {
24699 case libcall:
24700 case no_stringop:
24701 case last_alg:
24702 gcc_unreachable ();
24703 case loop_1_byte:
24704 case loop:
24705 case unrolled_loop:
24706 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24707 count_exp, move_mode, unroll_factor,
24708 expected_size, issetmem);
24709 break;
24710 case vector_loop:
24711 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24712 vec_promoted_val, count_exp, move_mode,
24713 unroll_factor, expected_size, issetmem);
24714 break;
24715 case rep_prefix_8_byte:
24716 case rep_prefix_4_byte:
24717 case rep_prefix_1_byte:
24718 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24719 val_exp, count_exp, move_mode, issetmem);
24720 break;
24721 }
24722 /* Adjust properly the offset of src and dest memory for aliasing. */
24723 if (CONST_INT_P (count_exp))
24724 {
24725 if (!issetmem)
24726 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24727 (count / size_needed) * size_needed);
24728 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24729 (count / size_needed) * size_needed);
24730 }
24731 else
24732 {
24733 if (!issetmem)
24734 src = change_address (src, BLKmode, srcreg);
24735 dst = change_address (dst, BLKmode, destreg);
24736 }
24737
24738 /* Step 4: Epilogue to copy the remaining bytes. */
24739 epilogue:
24740 if (label)
24741 {
24742 /* When the main loop is done, COUNT_EXP might hold original count,
24743 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24744 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24745 bytes. Compensate if needed. */
24746
24747 if (size_needed < epilogue_size_needed)
24748 {
24749 tmp =
24750 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24751 GEN_INT (size_needed - 1), count_exp, 1,
24752 OPTAB_DIRECT);
24753 if (tmp != count_exp)
24754 emit_move_insn (count_exp, tmp);
24755 }
24756 emit_label (label);
24757 LABEL_NUSES (label) = 1;
24758 }
24759
24760 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24761 {
24762 if (force_loopy_epilogue)
24763 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24764 epilogue_size_needed);
24765 else
24766 {
24767 if (issetmem)
24768 expand_setmem_epilogue (dst, destreg, promoted_val,
24769 vec_promoted_val, count_exp,
24770 epilogue_size_needed);
24771 else
24772 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24773 epilogue_size_needed);
24774 }
24775 }
24776 if (jump_around_label)
24777 emit_label (jump_around_label);
24778 return true;
24779 }
24780
24781
24782 /* Expand the appropriate insns for doing strlen if not just doing
24783 repnz; scasb
24784
24785 out = result, initialized with the start address
24786 align_rtx = alignment of the address.
24787 scratch = scratch register, initialized with the startaddress when
24788 not aligned, otherwise undefined
24789
24790 This is just the body. It needs the initializations mentioned above and
24791 some address computing at the end. These things are done in i386.md. */
24792
24793 static void
24794 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24795 {
24796 int align;
24797 rtx tmp;
24798 rtx_code_label *align_2_label = NULL;
24799 rtx_code_label *align_3_label = NULL;
24800 rtx_code_label *align_4_label = gen_label_rtx ();
24801 rtx_code_label *end_0_label = gen_label_rtx ();
24802 rtx mem;
24803 rtx tmpreg = gen_reg_rtx (SImode);
24804 rtx scratch = gen_reg_rtx (SImode);
24805 rtx cmp;
24806
24807 align = 0;
24808 if (CONST_INT_P (align_rtx))
24809 align = INTVAL (align_rtx);
24810
24811 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24812
24813 /* Is there a known alignment and is it less than 4? */
24814 if (align < 4)
24815 {
24816 rtx scratch1 = gen_reg_rtx (Pmode);
24817 emit_move_insn (scratch1, out);
24818 /* Is there a known alignment and is it not 2? */
24819 if (align != 2)
24820 {
24821 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24822 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24823
24824 /* Leave just the 3 lower bits. */
24825 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24826 NULL_RTX, 0, OPTAB_WIDEN);
24827
24828 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24829 Pmode, 1, align_4_label);
24830 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24831 Pmode, 1, align_2_label);
24832 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24833 Pmode, 1, align_3_label);
24834 }
24835 else
24836 {
24837 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24838 check if is aligned to 4 - byte. */
24839
24840 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24841 NULL_RTX, 0, OPTAB_WIDEN);
24842
24843 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24844 Pmode, 1, align_4_label);
24845 }
24846
24847 mem = change_address (src, QImode, out);
24848
24849 /* Now compare the bytes. */
24850
24851 /* Compare the first n unaligned byte on a byte per byte basis. */
24852 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24853 QImode, 1, end_0_label);
24854
24855 /* Increment the address. */
24856 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24857
24858 /* Not needed with an alignment of 2 */
24859 if (align != 2)
24860 {
24861 emit_label (align_2_label);
24862
24863 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24864 end_0_label);
24865
24866 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24867
24868 emit_label (align_3_label);
24869 }
24870
24871 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24872 end_0_label);
24873
24874 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24875 }
24876
24877 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24878 align this loop. It gives only huge programs, but does not help to
24879 speed up. */
24880 emit_label (align_4_label);
24881
24882 mem = change_address (src, SImode, out);
24883 emit_move_insn (scratch, mem);
24884 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24885
24886 /* This formula yields a nonzero result iff one of the bytes is zero.
24887 This saves three branches inside loop and many cycles. */
24888
24889 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24890 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24891 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24892 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24893 gen_int_mode (0x80808080, SImode)));
24894 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24895 align_4_label);
24896
24897 if (TARGET_CMOVE)
24898 {
24899 rtx reg = gen_reg_rtx (SImode);
24900 rtx reg2 = gen_reg_rtx (Pmode);
24901 emit_move_insn (reg, tmpreg);
24902 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24903
24904 /* If zero is not in the first two bytes, move two bytes forward. */
24905 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24906 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24907 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24908 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24909 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24910 reg,
24911 tmpreg)));
24912 /* Emit lea manually to avoid clobbering of flags. */
24913 emit_insn (gen_rtx_SET (SImode, reg2,
24914 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24915
24916 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24917 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24918 emit_insn (gen_rtx_SET (VOIDmode, out,
24919 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24920 reg2,
24921 out)));
24922 }
24923 else
24924 {
24925 rtx_code_label *end_2_label = gen_label_rtx ();
24926 /* Is zero in the first two bytes? */
24927
24928 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24929 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24930 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24931 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24932 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24933 pc_rtx);
24934 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24935 JUMP_LABEL (tmp) = end_2_label;
24936
24937 /* Not in the first two. Move two bytes forward. */
24938 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24939 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24940
24941 emit_label (end_2_label);
24942
24943 }
24944
24945 /* Avoid branch in fixing the byte. */
24946 tmpreg = gen_lowpart (QImode, tmpreg);
24947 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24948 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24949 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24950 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24951
24952 emit_label (end_0_label);
24953 }
24954
24955 /* Expand strlen. */
24956
24957 bool
24958 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24959 {
24960 rtx addr, scratch1, scratch2, scratch3, scratch4;
24961
24962 /* The generic case of strlen expander is long. Avoid it's
24963 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24964
24965 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24966 && !TARGET_INLINE_ALL_STRINGOPS
24967 && !optimize_insn_for_size_p ()
24968 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24969 return false;
24970
24971 addr = force_reg (Pmode, XEXP (src, 0));
24972 scratch1 = gen_reg_rtx (Pmode);
24973
24974 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24975 && !optimize_insn_for_size_p ())
24976 {
24977 /* Well it seems that some optimizer does not combine a call like
24978 foo(strlen(bar), strlen(bar));
24979 when the move and the subtraction is done here. It does calculate
24980 the length just once when these instructions are done inside of
24981 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24982 often used and I use one fewer register for the lifetime of
24983 output_strlen_unroll() this is better. */
24984
24985 emit_move_insn (out, addr);
24986
24987 ix86_expand_strlensi_unroll_1 (out, src, align);
24988
24989 /* strlensi_unroll_1 returns the address of the zero at the end of
24990 the string, like memchr(), so compute the length by subtracting
24991 the start address. */
24992 emit_insn (ix86_gen_sub3 (out, out, addr));
24993 }
24994 else
24995 {
24996 rtx unspec;
24997
24998 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24999 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25000 return false;
25001
25002 scratch2 = gen_reg_rtx (Pmode);
25003 scratch3 = gen_reg_rtx (Pmode);
25004 scratch4 = force_reg (Pmode, constm1_rtx);
25005
25006 emit_move_insn (scratch3, addr);
25007 eoschar = force_reg (QImode, eoschar);
25008
25009 src = replace_equiv_address_nv (src, scratch3);
25010
25011 /* If .md starts supporting :P, this can be done in .md. */
25012 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25013 scratch4), UNSPEC_SCAS);
25014 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25015 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25016 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25017 }
25018 return true;
25019 }
25020
25021 /* For given symbol (function) construct code to compute address of it's PLT
25022 entry in large x86-64 PIC model. */
25023 static rtx
25024 construct_plt_address (rtx symbol)
25025 {
25026 rtx tmp, unspec;
25027
25028 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25029 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25030 gcc_assert (Pmode == DImode);
25031
25032 tmp = gen_reg_rtx (Pmode);
25033 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25034
25035 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25036 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25037 return tmp;
25038 }
25039
25040 rtx
25041 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25042 rtx callarg2,
25043 rtx pop, bool sibcall)
25044 {
25045 rtx vec[3];
25046 rtx use = NULL, call;
25047 unsigned int vec_len = 0;
25048
25049 if (pop == const0_rtx)
25050 pop = NULL;
25051 gcc_assert (!TARGET_64BIT || !pop);
25052
25053 if (TARGET_MACHO && !TARGET_64BIT)
25054 {
25055 #if TARGET_MACHO
25056 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25057 fnaddr = machopic_indirect_call_target (fnaddr);
25058 #endif
25059 }
25060 else
25061 {
25062 /* Static functions and indirect calls don't need the pic register. */
25063 if (flag_pic
25064 && (!TARGET_64BIT
25065 || (ix86_cmodel == CM_LARGE_PIC
25066 && DEFAULT_ABI != MS_ABI))
25067 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25068 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25069 {
25070 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25071 if (ix86_use_pseudo_pic_reg ())
25072 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25073 pic_offset_table_rtx);
25074 }
25075 }
25076
25077 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25078 {
25079 rtx al = gen_rtx_REG (QImode, AX_REG);
25080 emit_move_insn (al, callarg2);
25081 use_reg (&use, al);
25082 }
25083
25084 if (ix86_cmodel == CM_LARGE_PIC
25085 && !TARGET_PECOFF
25086 && MEM_P (fnaddr)
25087 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25088 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25089 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25090 else if (sibcall
25091 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25092 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25093 {
25094 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25095 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25096 }
25097
25098 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25099 if (retval)
25100 call = gen_rtx_SET (VOIDmode, retval, call);
25101 vec[vec_len++] = call;
25102
25103 if (pop)
25104 {
25105 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25106 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25107 vec[vec_len++] = pop;
25108 }
25109
25110 if (TARGET_64BIT_MS_ABI
25111 && (!callarg2 || INTVAL (callarg2) != -2))
25112 {
25113 int const cregs_size
25114 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25115 int i;
25116
25117 for (i = 0; i < cregs_size; i++)
25118 {
25119 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25120 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25121
25122 clobber_reg (&use, gen_rtx_REG (mode, regno));
25123 }
25124 }
25125
25126 if (vec_len > 1)
25127 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25128 call = emit_call_insn (call);
25129 if (use)
25130 CALL_INSN_FUNCTION_USAGE (call) = use;
25131
25132 return call;
25133 }
25134
25135 /* Output the assembly for a call instruction. */
25136
25137 const char *
25138 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25139 {
25140 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25141 bool seh_nop_p = false;
25142 const char *xasm;
25143
25144 if (SIBLING_CALL_P (insn))
25145 {
25146 if (direct_p)
25147 xasm = "jmp\t%P0";
25148 /* SEH epilogue detection requires the indirect branch case
25149 to include REX.W. */
25150 else if (TARGET_SEH)
25151 xasm = "rex.W jmp %A0";
25152 else
25153 xasm = "jmp\t%A0";
25154
25155 output_asm_insn (xasm, &call_op);
25156 return "";
25157 }
25158
25159 /* SEH unwinding can require an extra nop to be emitted in several
25160 circumstances. Determine if we have one of those. */
25161 if (TARGET_SEH)
25162 {
25163 rtx_insn *i;
25164
25165 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25166 {
25167 /* If we get to another real insn, we don't need the nop. */
25168 if (INSN_P (i))
25169 break;
25170
25171 /* If we get to the epilogue note, prevent a catch region from
25172 being adjacent to the standard epilogue sequence. If non-
25173 call-exceptions, we'll have done this during epilogue emission. */
25174 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25175 && !flag_non_call_exceptions
25176 && !can_throw_internal (insn))
25177 {
25178 seh_nop_p = true;
25179 break;
25180 }
25181 }
25182
25183 /* If we didn't find a real insn following the call, prevent the
25184 unwinder from looking into the next function. */
25185 if (i == NULL)
25186 seh_nop_p = true;
25187 }
25188
25189 if (direct_p)
25190 xasm = "call\t%P0";
25191 else
25192 xasm = "call\t%A0";
25193
25194 output_asm_insn (xasm, &call_op);
25195
25196 if (seh_nop_p)
25197 return "nop";
25198
25199 return "";
25200 }
25201 \f
25202 /* Clear stack slot assignments remembered from previous functions.
25203 This is called from INIT_EXPANDERS once before RTL is emitted for each
25204 function. */
25205
25206 static struct machine_function *
25207 ix86_init_machine_status (void)
25208 {
25209 struct machine_function *f;
25210
25211 f = ggc_cleared_alloc<machine_function> ();
25212 f->use_fast_prologue_epilogue_nregs = -1;
25213 f->call_abi = ix86_abi;
25214
25215 return f;
25216 }
25217
25218 /* Return a MEM corresponding to a stack slot with mode MODE.
25219 Allocate a new slot if necessary.
25220
25221 The RTL for a function can have several slots available: N is
25222 which slot to use. */
25223
25224 rtx
25225 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25226 {
25227 struct stack_local_entry *s;
25228
25229 gcc_assert (n < MAX_386_STACK_LOCALS);
25230
25231 for (s = ix86_stack_locals; s; s = s->next)
25232 if (s->mode == mode && s->n == n)
25233 return validize_mem (copy_rtx (s->rtl));
25234
25235 s = ggc_alloc<stack_local_entry> ();
25236 s->n = n;
25237 s->mode = mode;
25238 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25239
25240 s->next = ix86_stack_locals;
25241 ix86_stack_locals = s;
25242 return validize_mem (copy_rtx (s->rtl));
25243 }
25244
25245 static void
25246 ix86_instantiate_decls (void)
25247 {
25248 struct stack_local_entry *s;
25249
25250 for (s = ix86_stack_locals; s; s = s->next)
25251 if (s->rtl != NULL_RTX)
25252 instantiate_decl_rtl (s->rtl);
25253 }
25254 \f
25255 /* Check whether x86 address PARTS is a pc-relative address. */
25256
25257 static bool
25258 rip_relative_addr_p (struct ix86_address *parts)
25259 {
25260 rtx base, index, disp;
25261
25262 base = parts->base;
25263 index = parts->index;
25264 disp = parts->disp;
25265
25266 if (disp && !base && !index)
25267 {
25268 if (TARGET_64BIT)
25269 {
25270 rtx symbol = disp;
25271
25272 if (GET_CODE (disp) == CONST)
25273 symbol = XEXP (disp, 0);
25274 if (GET_CODE (symbol) == PLUS
25275 && CONST_INT_P (XEXP (symbol, 1)))
25276 symbol = XEXP (symbol, 0);
25277
25278 if (GET_CODE (symbol) == LABEL_REF
25279 || (GET_CODE (symbol) == SYMBOL_REF
25280 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25281 || (GET_CODE (symbol) == UNSPEC
25282 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25283 || XINT (symbol, 1) == UNSPEC_PCREL
25284 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25285 return true;
25286 }
25287 }
25288 return false;
25289 }
25290
25291 /* Calculate the length of the memory address in the instruction encoding.
25292 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25293 or other prefixes. We never generate addr32 prefix for LEA insn. */
25294
25295 int
25296 memory_address_length (rtx addr, bool lea)
25297 {
25298 struct ix86_address parts;
25299 rtx base, index, disp;
25300 int len;
25301 int ok;
25302
25303 if (GET_CODE (addr) == PRE_DEC
25304 || GET_CODE (addr) == POST_INC
25305 || GET_CODE (addr) == PRE_MODIFY
25306 || GET_CODE (addr) == POST_MODIFY)
25307 return 0;
25308
25309 ok = ix86_decompose_address (addr, &parts);
25310 gcc_assert (ok);
25311
25312 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25313
25314 /* If this is not LEA instruction, add the length of addr32 prefix. */
25315 if (TARGET_64BIT && !lea
25316 && (SImode_address_operand (addr, VOIDmode)
25317 || (parts.base && GET_MODE (parts.base) == SImode)
25318 || (parts.index && GET_MODE (parts.index) == SImode)))
25319 len++;
25320
25321 base = parts.base;
25322 index = parts.index;
25323 disp = parts.disp;
25324
25325 if (base && GET_CODE (base) == SUBREG)
25326 base = SUBREG_REG (base);
25327 if (index && GET_CODE (index) == SUBREG)
25328 index = SUBREG_REG (index);
25329
25330 gcc_assert (base == NULL_RTX || REG_P (base));
25331 gcc_assert (index == NULL_RTX || REG_P (index));
25332
25333 /* Rule of thumb:
25334 - esp as the base always wants an index,
25335 - ebp as the base always wants a displacement,
25336 - r12 as the base always wants an index,
25337 - r13 as the base always wants a displacement. */
25338
25339 /* Register Indirect. */
25340 if (base && !index && !disp)
25341 {
25342 /* esp (for its index) and ebp (for its displacement) need
25343 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25344 code. */
25345 if (base == arg_pointer_rtx
25346 || base == frame_pointer_rtx
25347 || REGNO (base) == SP_REG
25348 || REGNO (base) == BP_REG
25349 || REGNO (base) == R12_REG
25350 || REGNO (base) == R13_REG)
25351 len++;
25352 }
25353
25354 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25355 is not disp32, but disp32(%rip), so for disp32
25356 SIB byte is needed, unless print_operand_address
25357 optimizes it into disp32(%rip) or (%rip) is implied
25358 by UNSPEC. */
25359 else if (disp && !base && !index)
25360 {
25361 len += 4;
25362 if (rip_relative_addr_p (&parts))
25363 len++;
25364 }
25365 else
25366 {
25367 /* Find the length of the displacement constant. */
25368 if (disp)
25369 {
25370 if (base && satisfies_constraint_K (disp))
25371 len += 1;
25372 else
25373 len += 4;
25374 }
25375 /* ebp always wants a displacement. Similarly r13. */
25376 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25377 len++;
25378
25379 /* An index requires the two-byte modrm form.... */
25380 if (index
25381 /* ...like esp (or r12), which always wants an index. */
25382 || base == arg_pointer_rtx
25383 || base == frame_pointer_rtx
25384 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25385 len++;
25386 }
25387
25388 return len;
25389 }
25390
25391 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25392 is set, expect that insn have 8bit immediate alternative. */
25393 int
25394 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25395 {
25396 int len = 0;
25397 int i;
25398 extract_insn_cached (insn);
25399 for (i = recog_data.n_operands - 1; i >= 0; --i)
25400 if (CONSTANT_P (recog_data.operand[i]))
25401 {
25402 enum attr_mode mode = get_attr_mode (insn);
25403
25404 gcc_assert (!len);
25405 if (shortform && CONST_INT_P (recog_data.operand[i]))
25406 {
25407 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25408 switch (mode)
25409 {
25410 case MODE_QI:
25411 len = 1;
25412 continue;
25413 case MODE_HI:
25414 ival = trunc_int_for_mode (ival, HImode);
25415 break;
25416 case MODE_SI:
25417 ival = trunc_int_for_mode (ival, SImode);
25418 break;
25419 default:
25420 break;
25421 }
25422 if (IN_RANGE (ival, -128, 127))
25423 {
25424 len = 1;
25425 continue;
25426 }
25427 }
25428 switch (mode)
25429 {
25430 case MODE_QI:
25431 len = 1;
25432 break;
25433 case MODE_HI:
25434 len = 2;
25435 break;
25436 case MODE_SI:
25437 len = 4;
25438 break;
25439 /* Immediates for DImode instructions are encoded
25440 as 32bit sign extended values. */
25441 case MODE_DI:
25442 len = 4;
25443 break;
25444 default:
25445 fatal_insn ("unknown insn mode", insn);
25446 }
25447 }
25448 return len;
25449 }
25450
25451 /* Compute default value for "length_address" attribute. */
25452 int
25453 ix86_attr_length_address_default (rtx_insn *insn)
25454 {
25455 int i;
25456
25457 if (get_attr_type (insn) == TYPE_LEA)
25458 {
25459 rtx set = PATTERN (insn), addr;
25460
25461 if (GET_CODE (set) == PARALLEL)
25462 set = XVECEXP (set, 0, 0);
25463
25464 gcc_assert (GET_CODE (set) == SET);
25465
25466 addr = SET_SRC (set);
25467
25468 return memory_address_length (addr, true);
25469 }
25470
25471 extract_insn_cached (insn);
25472 for (i = recog_data.n_operands - 1; i >= 0; --i)
25473 if (MEM_P (recog_data.operand[i]))
25474 {
25475 constrain_operands_cached (insn, reload_completed);
25476 if (which_alternative != -1)
25477 {
25478 const char *constraints = recog_data.constraints[i];
25479 int alt = which_alternative;
25480
25481 while (*constraints == '=' || *constraints == '+')
25482 constraints++;
25483 while (alt-- > 0)
25484 while (*constraints++ != ',')
25485 ;
25486 /* Skip ignored operands. */
25487 if (*constraints == 'X')
25488 continue;
25489 }
25490 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25491 }
25492 return 0;
25493 }
25494
25495 /* Compute default value for "length_vex" attribute. It includes
25496 2 or 3 byte VEX prefix and 1 opcode byte. */
25497
25498 int
25499 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25500 bool has_vex_w)
25501 {
25502 int i;
25503
25504 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25505 byte VEX prefix. */
25506 if (!has_0f_opcode || has_vex_w)
25507 return 3 + 1;
25508
25509 /* We can always use 2 byte VEX prefix in 32bit. */
25510 if (!TARGET_64BIT)
25511 return 2 + 1;
25512
25513 extract_insn_cached (insn);
25514
25515 for (i = recog_data.n_operands - 1; i >= 0; --i)
25516 if (REG_P (recog_data.operand[i]))
25517 {
25518 /* REX.W bit uses 3 byte VEX prefix. */
25519 if (GET_MODE (recog_data.operand[i]) == DImode
25520 && GENERAL_REG_P (recog_data.operand[i]))
25521 return 3 + 1;
25522 }
25523 else
25524 {
25525 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25526 if (MEM_P (recog_data.operand[i])
25527 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25528 return 3 + 1;
25529 }
25530
25531 return 2 + 1;
25532 }
25533 \f
25534 /* Return the maximum number of instructions a cpu can issue. */
25535
25536 static int
25537 ix86_issue_rate (void)
25538 {
25539 switch (ix86_tune)
25540 {
25541 case PROCESSOR_PENTIUM:
25542 case PROCESSOR_BONNELL:
25543 case PROCESSOR_SILVERMONT:
25544 case PROCESSOR_INTEL:
25545 case PROCESSOR_K6:
25546 case PROCESSOR_BTVER2:
25547 case PROCESSOR_PENTIUM4:
25548 case PROCESSOR_NOCONA:
25549 return 2;
25550
25551 case PROCESSOR_PENTIUMPRO:
25552 case PROCESSOR_ATHLON:
25553 case PROCESSOR_K8:
25554 case PROCESSOR_AMDFAM10:
25555 case PROCESSOR_GENERIC:
25556 case PROCESSOR_BTVER1:
25557 return 3;
25558
25559 case PROCESSOR_BDVER1:
25560 case PROCESSOR_BDVER2:
25561 case PROCESSOR_BDVER3:
25562 case PROCESSOR_BDVER4:
25563 case PROCESSOR_CORE2:
25564 case PROCESSOR_NEHALEM:
25565 case PROCESSOR_SANDYBRIDGE:
25566 case PROCESSOR_HASWELL:
25567 return 4;
25568
25569 default:
25570 return 1;
25571 }
25572 }
25573
25574 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25575 by DEP_INSN and nothing set by DEP_INSN. */
25576
25577 static bool
25578 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25579 {
25580 rtx set, set2;
25581
25582 /* Simplify the test for uninteresting insns. */
25583 if (insn_type != TYPE_SETCC
25584 && insn_type != TYPE_ICMOV
25585 && insn_type != TYPE_FCMOV
25586 && insn_type != TYPE_IBR)
25587 return false;
25588
25589 if ((set = single_set (dep_insn)) != 0)
25590 {
25591 set = SET_DEST (set);
25592 set2 = NULL_RTX;
25593 }
25594 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25595 && XVECLEN (PATTERN (dep_insn), 0) == 2
25596 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25597 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25598 {
25599 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25600 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25601 }
25602 else
25603 return false;
25604
25605 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25606 return false;
25607
25608 /* This test is true if the dependent insn reads the flags but
25609 not any other potentially set register. */
25610 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25611 return false;
25612
25613 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25614 return false;
25615
25616 return true;
25617 }
25618
25619 /* Return true iff USE_INSN has a memory address with operands set by
25620 SET_INSN. */
25621
25622 bool
25623 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25624 {
25625 int i;
25626 extract_insn_cached (use_insn);
25627 for (i = recog_data.n_operands - 1; i >= 0; --i)
25628 if (MEM_P (recog_data.operand[i]))
25629 {
25630 rtx addr = XEXP (recog_data.operand[i], 0);
25631 return modified_in_p (addr, set_insn) != 0;
25632 }
25633 return false;
25634 }
25635
25636 /* Helper function for exact_store_load_dependency.
25637 Return true if addr is found in insn. */
25638 static bool
25639 exact_dependency_1 (rtx addr, rtx insn)
25640 {
25641 enum rtx_code code;
25642 const char *format_ptr;
25643 int i, j;
25644
25645 code = GET_CODE (insn);
25646 switch (code)
25647 {
25648 case MEM:
25649 if (rtx_equal_p (addr, insn))
25650 return true;
25651 break;
25652 case REG:
25653 CASE_CONST_ANY:
25654 case SYMBOL_REF:
25655 case CODE_LABEL:
25656 case PC:
25657 case CC0:
25658 case EXPR_LIST:
25659 return false;
25660 default:
25661 break;
25662 }
25663
25664 format_ptr = GET_RTX_FORMAT (code);
25665 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25666 {
25667 switch (*format_ptr++)
25668 {
25669 case 'e':
25670 if (exact_dependency_1 (addr, XEXP (insn, i)))
25671 return true;
25672 break;
25673 case 'E':
25674 for (j = 0; j < XVECLEN (insn, i); j++)
25675 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25676 return true;
25677 break;
25678 }
25679 }
25680 return false;
25681 }
25682
25683 /* Return true if there exists exact dependency for store & load, i.e.
25684 the same memory address is used in them. */
25685 static bool
25686 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25687 {
25688 rtx set1, set2;
25689
25690 set1 = single_set (store);
25691 if (!set1)
25692 return false;
25693 if (!MEM_P (SET_DEST (set1)))
25694 return false;
25695 set2 = single_set (load);
25696 if (!set2)
25697 return false;
25698 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25699 return true;
25700 return false;
25701 }
25702
25703 static int
25704 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25705 {
25706 enum attr_type insn_type, dep_insn_type;
25707 enum attr_memory memory;
25708 rtx set, set2;
25709 int dep_insn_code_number;
25710
25711 /* Anti and output dependencies have zero cost on all CPUs. */
25712 if (REG_NOTE_KIND (link) != 0)
25713 return 0;
25714
25715 dep_insn_code_number = recog_memoized (dep_insn);
25716
25717 /* If we can't recognize the insns, we can't really do anything. */
25718 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25719 return cost;
25720
25721 insn_type = get_attr_type (insn);
25722 dep_insn_type = get_attr_type (dep_insn);
25723
25724 switch (ix86_tune)
25725 {
25726 case PROCESSOR_PENTIUM:
25727 /* Address Generation Interlock adds a cycle of latency. */
25728 if (insn_type == TYPE_LEA)
25729 {
25730 rtx addr = PATTERN (insn);
25731
25732 if (GET_CODE (addr) == PARALLEL)
25733 addr = XVECEXP (addr, 0, 0);
25734
25735 gcc_assert (GET_CODE (addr) == SET);
25736
25737 addr = SET_SRC (addr);
25738 if (modified_in_p (addr, dep_insn))
25739 cost += 1;
25740 }
25741 else if (ix86_agi_dependent (dep_insn, insn))
25742 cost += 1;
25743
25744 /* ??? Compares pair with jump/setcc. */
25745 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25746 cost = 0;
25747
25748 /* Floating point stores require value to be ready one cycle earlier. */
25749 if (insn_type == TYPE_FMOV
25750 && get_attr_memory (insn) == MEMORY_STORE
25751 && !ix86_agi_dependent (dep_insn, insn))
25752 cost += 1;
25753 break;
25754
25755 case PROCESSOR_PENTIUMPRO:
25756 /* INT->FP conversion is expensive. */
25757 if (get_attr_fp_int_src (dep_insn))
25758 cost += 5;
25759
25760 /* There is one cycle extra latency between an FP op and a store. */
25761 if (insn_type == TYPE_FMOV
25762 && (set = single_set (dep_insn)) != NULL_RTX
25763 && (set2 = single_set (insn)) != NULL_RTX
25764 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25765 && MEM_P (SET_DEST (set2)))
25766 cost += 1;
25767
25768 memory = get_attr_memory (insn);
25769
25770 /* Show ability of reorder buffer to hide latency of load by executing
25771 in parallel with previous instruction in case
25772 previous instruction is not needed to compute the address. */
25773 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25774 && !ix86_agi_dependent (dep_insn, insn))
25775 {
25776 /* Claim moves to take one cycle, as core can issue one load
25777 at time and the next load can start cycle later. */
25778 if (dep_insn_type == TYPE_IMOV
25779 || dep_insn_type == TYPE_FMOV)
25780 cost = 1;
25781 else if (cost > 1)
25782 cost--;
25783 }
25784 break;
25785
25786 case PROCESSOR_K6:
25787 /* The esp dependency is resolved before
25788 the instruction is really finished. */
25789 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25790 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25791 return 1;
25792
25793 /* INT->FP conversion is expensive. */
25794 if (get_attr_fp_int_src (dep_insn))
25795 cost += 5;
25796
25797 memory = get_attr_memory (insn);
25798
25799 /* Show ability of reorder buffer to hide latency of load by executing
25800 in parallel with previous instruction in case
25801 previous instruction is not needed to compute the address. */
25802 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25803 && !ix86_agi_dependent (dep_insn, insn))
25804 {
25805 /* Claim moves to take one cycle, as core can issue one load
25806 at time and the next load can start cycle later. */
25807 if (dep_insn_type == TYPE_IMOV
25808 || dep_insn_type == TYPE_FMOV)
25809 cost = 1;
25810 else if (cost > 2)
25811 cost -= 2;
25812 else
25813 cost = 1;
25814 }
25815 break;
25816
25817 case PROCESSOR_AMDFAM10:
25818 case PROCESSOR_BDVER1:
25819 case PROCESSOR_BDVER2:
25820 case PROCESSOR_BDVER3:
25821 case PROCESSOR_BDVER4:
25822 case PROCESSOR_BTVER1:
25823 case PROCESSOR_BTVER2:
25824 case PROCESSOR_GENERIC:
25825 /* Stack engine allows to execute push&pop instructions in parall. */
25826 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25827 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25828 return 0;
25829 /* FALLTHRU */
25830
25831 case PROCESSOR_ATHLON:
25832 case PROCESSOR_K8:
25833 memory = get_attr_memory (insn);
25834
25835 /* Show ability of reorder buffer to hide latency of load by executing
25836 in parallel with previous instruction in case
25837 previous instruction is not needed to compute the address. */
25838 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25839 && !ix86_agi_dependent (dep_insn, insn))
25840 {
25841 enum attr_unit unit = get_attr_unit (insn);
25842 int loadcost = 3;
25843
25844 /* Because of the difference between the length of integer and
25845 floating unit pipeline preparation stages, the memory operands
25846 for floating point are cheaper.
25847
25848 ??? For Athlon it the difference is most probably 2. */
25849 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25850 loadcost = 3;
25851 else
25852 loadcost = TARGET_ATHLON ? 2 : 0;
25853
25854 if (cost >= loadcost)
25855 cost -= loadcost;
25856 else
25857 cost = 0;
25858 }
25859 break;
25860
25861 case PROCESSOR_CORE2:
25862 case PROCESSOR_NEHALEM:
25863 case PROCESSOR_SANDYBRIDGE:
25864 case PROCESSOR_HASWELL:
25865 /* Stack engine allows to execute push&pop instructions in parall. */
25866 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25867 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25868 return 0;
25869
25870 memory = get_attr_memory (insn);
25871
25872 /* Show ability of reorder buffer to hide latency of load by executing
25873 in parallel with previous instruction in case
25874 previous instruction is not needed to compute the address. */
25875 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25876 && !ix86_agi_dependent (dep_insn, insn))
25877 {
25878 if (cost >= 4)
25879 cost -= 4;
25880 else
25881 cost = 0;
25882 }
25883 break;
25884
25885 case PROCESSOR_SILVERMONT:
25886 case PROCESSOR_INTEL:
25887 if (!reload_completed)
25888 return cost;
25889
25890 /* Increase cost of integer loads. */
25891 memory = get_attr_memory (dep_insn);
25892 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25893 {
25894 enum attr_unit unit = get_attr_unit (dep_insn);
25895 if (unit == UNIT_INTEGER && cost == 1)
25896 {
25897 if (memory == MEMORY_LOAD)
25898 cost = 3;
25899 else
25900 {
25901 /* Increase cost of ld/st for short int types only
25902 because of store forwarding issue. */
25903 rtx set = single_set (dep_insn);
25904 if (set && (GET_MODE (SET_DEST (set)) == QImode
25905 || GET_MODE (SET_DEST (set)) == HImode))
25906 {
25907 /* Increase cost of store/load insn if exact
25908 dependence exists and it is load insn. */
25909 enum attr_memory insn_memory = get_attr_memory (insn);
25910 if (insn_memory == MEMORY_LOAD
25911 && exact_store_load_dependency (dep_insn, insn))
25912 cost = 3;
25913 }
25914 }
25915 }
25916 }
25917
25918 default:
25919 break;
25920 }
25921
25922 return cost;
25923 }
25924
25925 /* How many alternative schedules to try. This should be as wide as the
25926 scheduling freedom in the DFA, but no wider. Making this value too
25927 large results extra work for the scheduler. */
25928
25929 static int
25930 ia32_multipass_dfa_lookahead (void)
25931 {
25932 switch (ix86_tune)
25933 {
25934 case PROCESSOR_PENTIUM:
25935 return 2;
25936
25937 case PROCESSOR_PENTIUMPRO:
25938 case PROCESSOR_K6:
25939 return 1;
25940
25941 case PROCESSOR_BDVER1:
25942 case PROCESSOR_BDVER2:
25943 case PROCESSOR_BDVER3:
25944 case PROCESSOR_BDVER4:
25945 /* We use lookahead value 4 for BD both before and after reload
25946 schedules. Plan is to have value 8 included for O3. */
25947 return 4;
25948
25949 case PROCESSOR_CORE2:
25950 case PROCESSOR_NEHALEM:
25951 case PROCESSOR_SANDYBRIDGE:
25952 case PROCESSOR_HASWELL:
25953 case PROCESSOR_BONNELL:
25954 case PROCESSOR_SILVERMONT:
25955 case PROCESSOR_INTEL:
25956 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25957 as many instructions can be executed on a cycle, i.e.,
25958 issue_rate. I wonder why tuning for many CPUs does not do this. */
25959 if (reload_completed)
25960 return ix86_issue_rate ();
25961 /* Don't use lookahead for pre-reload schedule to save compile time. */
25962 return 0;
25963
25964 default:
25965 return 0;
25966 }
25967 }
25968
25969 /* Return true if target platform supports macro-fusion. */
25970
25971 static bool
25972 ix86_macro_fusion_p ()
25973 {
25974 return TARGET_FUSE_CMP_AND_BRANCH;
25975 }
25976
25977 /* Check whether current microarchitecture support macro fusion
25978 for insn pair "CONDGEN + CONDJMP". Refer to
25979 "Intel Architectures Optimization Reference Manual". */
25980
25981 static bool
25982 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25983 {
25984 rtx src, dest;
25985 enum rtx_code ccode;
25986 rtx compare_set = NULL_RTX, test_if, cond;
25987 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25988
25989 if (!any_condjump_p (condjmp))
25990 return false;
25991
25992 if (get_attr_type (condgen) != TYPE_TEST
25993 && get_attr_type (condgen) != TYPE_ICMP
25994 && get_attr_type (condgen) != TYPE_INCDEC
25995 && get_attr_type (condgen) != TYPE_ALU)
25996 return false;
25997
25998 compare_set = single_set (condgen);
25999 if (compare_set == NULL_RTX
26000 && !TARGET_FUSE_ALU_AND_BRANCH)
26001 return false;
26002
26003 if (compare_set == NULL_RTX)
26004 {
26005 int i;
26006 rtx pat = PATTERN (condgen);
26007 for (i = 0; i < XVECLEN (pat, 0); i++)
26008 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26009 {
26010 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26011 if (GET_CODE (set_src) == COMPARE)
26012 compare_set = XVECEXP (pat, 0, i);
26013 else
26014 alu_set = XVECEXP (pat, 0, i);
26015 }
26016 }
26017 if (compare_set == NULL_RTX)
26018 return false;
26019 src = SET_SRC (compare_set);
26020 if (GET_CODE (src) != COMPARE)
26021 return false;
26022
26023 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26024 supported. */
26025 if ((MEM_P (XEXP (src, 0))
26026 && CONST_INT_P (XEXP (src, 1)))
26027 || (MEM_P (XEXP (src, 1))
26028 && CONST_INT_P (XEXP (src, 0))))
26029 return false;
26030
26031 /* No fusion for RIP-relative address. */
26032 if (MEM_P (XEXP (src, 0)))
26033 addr = XEXP (XEXP (src, 0), 0);
26034 else if (MEM_P (XEXP (src, 1)))
26035 addr = XEXP (XEXP (src, 1), 0);
26036
26037 if (addr) {
26038 ix86_address parts;
26039 int ok = ix86_decompose_address (addr, &parts);
26040 gcc_assert (ok);
26041
26042 if (rip_relative_addr_p (&parts))
26043 return false;
26044 }
26045
26046 test_if = SET_SRC (pc_set (condjmp));
26047 cond = XEXP (test_if, 0);
26048 ccode = GET_CODE (cond);
26049 /* Check whether conditional jump use Sign or Overflow Flags. */
26050 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26051 && (ccode == GE
26052 || ccode == GT
26053 || ccode == LE
26054 || ccode == LT))
26055 return false;
26056
26057 /* Return true for TYPE_TEST and TYPE_ICMP. */
26058 if (get_attr_type (condgen) == TYPE_TEST
26059 || get_attr_type (condgen) == TYPE_ICMP)
26060 return true;
26061
26062 /* The following is the case that macro-fusion for alu + jmp. */
26063 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26064 return false;
26065
26066 /* No fusion for alu op with memory destination operand. */
26067 dest = SET_DEST (alu_set);
26068 if (MEM_P (dest))
26069 return false;
26070
26071 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26072 supported. */
26073 if (get_attr_type (condgen) == TYPE_INCDEC
26074 && (ccode == GEU
26075 || ccode == GTU
26076 || ccode == LEU
26077 || ccode == LTU))
26078 return false;
26079
26080 return true;
26081 }
26082
26083 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26084 execution. It is applied if
26085 (1) IMUL instruction is on the top of list;
26086 (2) There exists the only producer of independent IMUL instruction in
26087 ready list.
26088 Return index of IMUL producer if it was found and -1 otherwise. */
26089 static int
26090 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26091 {
26092 rtx_insn *insn;
26093 rtx set, insn1, insn2;
26094 sd_iterator_def sd_it;
26095 dep_t dep;
26096 int index = -1;
26097 int i;
26098
26099 if (!TARGET_BONNELL)
26100 return index;
26101
26102 /* Check that IMUL instruction is on the top of ready list. */
26103 insn = ready[n_ready - 1];
26104 set = single_set (insn);
26105 if (!set)
26106 return index;
26107 if (!(GET_CODE (SET_SRC (set)) == MULT
26108 && GET_MODE (SET_SRC (set)) == SImode))
26109 return index;
26110
26111 /* Search for producer of independent IMUL instruction. */
26112 for (i = n_ready - 2; i >= 0; i--)
26113 {
26114 insn = ready[i];
26115 if (!NONDEBUG_INSN_P (insn))
26116 continue;
26117 /* Skip IMUL instruction. */
26118 insn2 = PATTERN (insn);
26119 if (GET_CODE (insn2) == PARALLEL)
26120 insn2 = XVECEXP (insn2, 0, 0);
26121 if (GET_CODE (insn2) == SET
26122 && GET_CODE (SET_SRC (insn2)) == MULT
26123 && GET_MODE (SET_SRC (insn2)) == SImode)
26124 continue;
26125
26126 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26127 {
26128 rtx con;
26129 con = DEP_CON (dep);
26130 if (!NONDEBUG_INSN_P (con))
26131 continue;
26132 insn1 = PATTERN (con);
26133 if (GET_CODE (insn1) == PARALLEL)
26134 insn1 = XVECEXP (insn1, 0, 0);
26135
26136 if (GET_CODE (insn1) == SET
26137 && GET_CODE (SET_SRC (insn1)) == MULT
26138 && GET_MODE (SET_SRC (insn1)) == SImode)
26139 {
26140 sd_iterator_def sd_it1;
26141 dep_t dep1;
26142 /* Check if there is no other dependee for IMUL. */
26143 index = i;
26144 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26145 {
26146 rtx pro;
26147 pro = DEP_PRO (dep1);
26148 if (!NONDEBUG_INSN_P (pro))
26149 continue;
26150 if (pro != insn)
26151 index = -1;
26152 }
26153 if (index >= 0)
26154 break;
26155 }
26156 }
26157 if (index >= 0)
26158 break;
26159 }
26160 return index;
26161 }
26162
26163 /* Try to find the best candidate on the top of ready list if two insns
26164 have the same priority - candidate is best if its dependees were
26165 scheduled earlier. Applied for Silvermont only.
26166 Return true if top 2 insns must be interchanged. */
26167 static bool
26168 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26169 {
26170 rtx_insn *top = ready[n_ready - 1];
26171 rtx_insn *next = ready[n_ready - 2];
26172 rtx set;
26173 sd_iterator_def sd_it;
26174 dep_t dep;
26175 int clock1 = -1;
26176 int clock2 = -1;
26177 #define INSN_TICK(INSN) (HID (INSN)->tick)
26178
26179 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26180 return false;
26181
26182 if (!NONDEBUG_INSN_P (top))
26183 return false;
26184 if (!NONJUMP_INSN_P (top))
26185 return false;
26186 if (!NONDEBUG_INSN_P (next))
26187 return false;
26188 if (!NONJUMP_INSN_P (next))
26189 return false;
26190 set = single_set (top);
26191 if (!set)
26192 return false;
26193 set = single_set (next);
26194 if (!set)
26195 return false;
26196
26197 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26198 {
26199 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26200 return false;
26201 /* Determine winner more precise. */
26202 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26203 {
26204 rtx pro;
26205 pro = DEP_PRO (dep);
26206 if (!NONDEBUG_INSN_P (pro))
26207 continue;
26208 if (INSN_TICK (pro) > clock1)
26209 clock1 = INSN_TICK (pro);
26210 }
26211 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26212 {
26213 rtx pro;
26214 pro = DEP_PRO (dep);
26215 if (!NONDEBUG_INSN_P (pro))
26216 continue;
26217 if (INSN_TICK (pro) > clock2)
26218 clock2 = INSN_TICK (pro);
26219 }
26220
26221 if (clock1 == clock2)
26222 {
26223 /* Determine winner - load must win. */
26224 enum attr_memory memory1, memory2;
26225 memory1 = get_attr_memory (top);
26226 memory2 = get_attr_memory (next);
26227 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26228 return true;
26229 }
26230 return (bool) (clock2 < clock1);
26231 }
26232 return false;
26233 #undef INSN_TICK
26234 }
26235
26236 /* Perform possible reodering of ready list for Atom/Silvermont only.
26237 Return issue rate. */
26238 static int
26239 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26240 int *pn_ready, int clock_var)
26241 {
26242 int issue_rate = -1;
26243 int n_ready = *pn_ready;
26244 int i;
26245 rtx_insn *insn;
26246 int index = -1;
26247
26248 /* Set up issue rate. */
26249 issue_rate = ix86_issue_rate ();
26250
26251 /* Do reodering for BONNELL/SILVERMONT only. */
26252 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26253 return issue_rate;
26254
26255 /* Nothing to do if ready list contains only 1 instruction. */
26256 if (n_ready <= 1)
26257 return issue_rate;
26258
26259 /* Do reodering for post-reload scheduler only. */
26260 if (!reload_completed)
26261 return issue_rate;
26262
26263 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26264 {
26265 if (sched_verbose > 1)
26266 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26267 INSN_UID (ready[index]));
26268
26269 /* Put IMUL producer (ready[index]) at the top of ready list. */
26270 insn = ready[index];
26271 for (i = index; i < n_ready - 1; i++)
26272 ready[i] = ready[i + 1];
26273 ready[n_ready - 1] = insn;
26274 return issue_rate;
26275 }
26276 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26277 {
26278 if (sched_verbose > 1)
26279 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26280 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26281 /* Swap 2 top elements of ready list. */
26282 insn = ready[n_ready - 1];
26283 ready[n_ready - 1] = ready[n_ready - 2];
26284 ready[n_ready - 2] = insn;
26285 }
26286 return issue_rate;
26287 }
26288
26289 static bool
26290 ix86_class_likely_spilled_p (reg_class_t);
26291
26292 /* Returns true if lhs of insn is HW function argument register and set up
26293 is_spilled to true if it is likely spilled HW register. */
26294 static bool
26295 insn_is_function_arg (rtx insn, bool* is_spilled)
26296 {
26297 rtx dst;
26298
26299 if (!NONDEBUG_INSN_P (insn))
26300 return false;
26301 /* Call instructions are not movable, ignore it. */
26302 if (CALL_P (insn))
26303 return false;
26304 insn = PATTERN (insn);
26305 if (GET_CODE (insn) == PARALLEL)
26306 insn = XVECEXP (insn, 0, 0);
26307 if (GET_CODE (insn) != SET)
26308 return false;
26309 dst = SET_DEST (insn);
26310 if (REG_P (dst) && HARD_REGISTER_P (dst)
26311 && ix86_function_arg_regno_p (REGNO (dst)))
26312 {
26313 /* Is it likely spilled HW register? */
26314 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26315 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26316 *is_spilled = true;
26317 return true;
26318 }
26319 return false;
26320 }
26321
26322 /* Add output dependencies for chain of function adjacent arguments if only
26323 there is a move to likely spilled HW register. Return first argument
26324 if at least one dependence was added or NULL otherwise. */
26325 static rtx_insn *
26326 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26327 {
26328 rtx_insn *insn;
26329 rtx_insn *last = call;
26330 rtx_insn *first_arg = NULL;
26331 bool is_spilled = false;
26332
26333 head = PREV_INSN (head);
26334
26335 /* Find nearest to call argument passing instruction. */
26336 while (true)
26337 {
26338 last = PREV_INSN (last);
26339 if (last == head)
26340 return NULL;
26341 if (!NONDEBUG_INSN_P (last))
26342 continue;
26343 if (insn_is_function_arg (last, &is_spilled))
26344 break;
26345 return NULL;
26346 }
26347
26348 first_arg = last;
26349 while (true)
26350 {
26351 insn = PREV_INSN (last);
26352 if (!INSN_P (insn))
26353 break;
26354 if (insn == head)
26355 break;
26356 if (!NONDEBUG_INSN_P (insn))
26357 {
26358 last = insn;
26359 continue;
26360 }
26361 if (insn_is_function_arg (insn, &is_spilled))
26362 {
26363 /* Add output depdendence between two function arguments if chain
26364 of output arguments contains likely spilled HW registers. */
26365 if (is_spilled)
26366 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26367 first_arg = last = insn;
26368 }
26369 else
26370 break;
26371 }
26372 if (!is_spilled)
26373 return NULL;
26374 return first_arg;
26375 }
26376
26377 /* Add output or anti dependency from insn to first_arg to restrict its code
26378 motion. */
26379 static void
26380 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26381 {
26382 rtx set;
26383 rtx tmp;
26384
26385 set = single_set (insn);
26386 if (!set)
26387 return;
26388 tmp = SET_DEST (set);
26389 if (REG_P (tmp))
26390 {
26391 /* Add output dependency to the first function argument. */
26392 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26393 return;
26394 }
26395 /* Add anti dependency. */
26396 add_dependence (first_arg, insn, REG_DEP_ANTI);
26397 }
26398
26399 /* Avoid cross block motion of function argument through adding dependency
26400 from the first non-jump instruction in bb. */
26401 static void
26402 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26403 {
26404 rtx_insn *insn = BB_END (bb);
26405
26406 while (insn)
26407 {
26408 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26409 {
26410 rtx set = single_set (insn);
26411 if (set)
26412 {
26413 avoid_func_arg_motion (arg, insn);
26414 return;
26415 }
26416 }
26417 if (insn == BB_HEAD (bb))
26418 return;
26419 insn = PREV_INSN (insn);
26420 }
26421 }
26422
26423 /* Hook for pre-reload schedule - avoid motion of function arguments
26424 passed in likely spilled HW registers. */
26425 static void
26426 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26427 {
26428 rtx_insn *insn;
26429 rtx_insn *first_arg = NULL;
26430 if (reload_completed)
26431 return;
26432 while (head != tail && DEBUG_INSN_P (head))
26433 head = NEXT_INSN (head);
26434 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26435 if (INSN_P (insn) && CALL_P (insn))
26436 {
26437 first_arg = add_parameter_dependencies (insn, head);
26438 if (first_arg)
26439 {
26440 /* Add dependee for first argument to predecessors if only
26441 region contains more than one block. */
26442 basic_block bb = BLOCK_FOR_INSN (insn);
26443 int rgn = CONTAINING_RGN (bb->index);
26444 int nr_blks = RGN_NR_BLOCKS (rgn);
26445 /* Skip trivial regions and region head blocks that can have
26446 predecessors outside of region. */
26447 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26448 {
26449 edge e;
26450 edge_iterator ei;
26451
26452 /* Regions are SCCs with the exception of selective
26453 scheduling with pipelining of outer blocks enabled.
26454 So also check that immediate predecessors of a non-head
26455 block are in the same region. */
26456 FOR_EACH_EDGE (e, ei, bb->preds)
26457 {
26458 /* Avoid creating of loop-carried dependencies through
26459 using topological ordering in the region. */
26460 if (rgn == CONTAINING_RGN (e->src->index)
26461 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26462 add_dependee_for_func_arg (first_arg, e->src);
26463 }
26464 }
26465 insn = first_arg;
26466 if (insn == head)
26467 break;
26468 }
26469 }
26470 else if (first_arg)
26471 avoid_func_arg_motion (first_arg, insn);
26472 }
26473
26474 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26475 HW registers to maximum, to schedule them at soon as possible. These are
26476 moves from function argument registers at the top of the function entry
26477 and moves from function return value registers after call. */
26478 static int
26479 ix86_adjust_priority (rtx_insn *insn, int priority)
26480 {
26481 rtx set;
26482
26483 if (reload_completed)
26484 return priority;
26485
26486 if (!NONDEBUG_INSN_P (insn))
26487 return priority;
26488
26489 set = single_set (insn);
26490 if (set)
26491 {
26492 rtx tmp = SET_SRC (set);
26493 if (REG_P (tmp)
26494 && HARD_REGISTER_P (tmp)
26495 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26496 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26497 return current_sched_info->sched_max_insns_priority;
26498 }
26499
26500 return priority;
26501 }
26502
26503 /* Model decoder of Core 2/i7.
26504 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26505 track the instruction fetch block boundaries and make sure that long
26506 (9+ bytes) instructions are assigned to D0. */
26507
26508 /* Maximum length of an insn that can be handled by
26509 a secondary decoder unit. '8' for Core 2/i7. */
26510 static int core2i7_secondary_decoder_max_insn_size;
26511
26512 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26513 '16' for Core 2/i7. */
26514 static int core2i7_ifetch_block_size;
26515
26516 /* Maximum number of instructions decoder can handle per cycle.
26517 '6' for Core 2/i7. */
26518 static int core2i7_ifetch_block_max_insns;
26519
26520 typedef struct ix86_first_cycle_multipass_data_ *
26521 ix86_first_cycle_multipass_data_t;
26522 typedef const struct ix86_first_cycle_multipass_data_ *
26523 const_ix86_first_cycle_multipass_data_t;
26524
26525 /* A variable to store target state across calls to max_issue within
26526 one cycle. */
26527 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26528 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26529
26530 /* Initialize DATA. */
26531 static void
26532 core2i7_first_cycle_multipass_init (void *_data)
26533 {
26534 ix86_first_cycle_multipass_data_t data
26535 = (ix86_first_cycle_multipass_data_t) _data;
26536
26537 data->ifetch_block_len = 0;
26538 data->ifetch_block_n_insns = 0;
26539 data->ready_try_change = NULL;
26540 data->ready_try_change_size = 0;
26541 }
26542
26543 /* Advancing the cycle; reset ifetch block counts. */
26544 static void
26545 core2i7_dfa_post_advance_cycle (void)
26546 {
26547 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26548
26549 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26550
26551 data->ifetch_block_len = 0;
26552 data->ifetch_block_n_insns = 0;
26553 }
26554
26555 static int min_insn_size (rtx_insn *);
26556
26557 /* Filter out insns from ready_try that the core will not be able to issue
26558 on current cycle due to decoder. */
26559 static void
26560 core2i7_first_cycle_multipass_filter_ready_try
26561 (const_ix86_first_cycle_multipass_data_t data,
26562 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26563 {
26564 while (n_ready--)
26565 {
26566 rtx_insn *insn;
26567 int insn_size;
26568
26569 if (ready_try[n_ready])
26570 continue;
26571
26572 insn = get_ready_element (n_ready);
26573 insn_size = min_insn_size (insn);
26574
26575 if (/* If this is a too long an insn for a secondary decoder ... */
26576 (!first_cycle_insn_p
26577 && insn_size > core2i7_secondary_decoder_max_insn_size)
26578 /* ... or it would not fit into the ifetch block ... */
26579 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26580 /* ... or the decoder is full already ... */
26581 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26582 /* ... mask the insn out. */
26583 {
26584 ready_try[n_ready] = 1;
26585
26586 if (data->ready_try_change)
26587 bitmap_set_bit (data->ready_try_change, n_ready);
26588 }
26589 }
26590 }
26591
26592 /* Prepare for a new round of multipass lookahead scheduling. */
26593 static void
26594 core2i7_first_cycle_multipass_begin (void *_data,
26595 signed char *ready_try, int n_ready,
26596 bool first_cycle_insn_p)
26597 {
26598 ix86_first_cycle_multipass_data_t data
26599 = (ix86_first_cycle_multipass_data_t) _data;
26600 const_ix86_first_cycle_multipass_data_t prev_data
26601 = ix86_first_cycle_multipass_data;
26602
26603 /* Restore the state from the end of the previous round. */
26604 data->ifetch_block_len = prev_data->ifetch_block_len;
26605 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26606
26607 /* Filter instructions that cannot be issued on current cycle due to
26608 decoder restrictions. */
26609 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26610 first_cycle_insn_p);
26611 }
26612
26613 /* INSN is being issued in current solution. Account for its impact on
26614 the decoder model. */
26615 static void
26616 core2i7_first_cycle_multipass_issue (void *_data,
26617 signed char *ready_try, int n_ready,
26618 rtx_insn *insn, const void *_prev_data)
26619 {
26620 ix86_first_cycle_multipass_data_t data
26621 = (ix86_first_cycle_multipass_data_t) _data;
26622 const_ix86_first_cycle_multipass_data_t prev_data
26623 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26624
26625 int insn_size = min_insn_size (insn);
26626
26627 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26628 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26629 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26630 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26631
26632 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26633 if (!data->ready_try_change)
26634 {
26635 data->ready_try_change = sbitmap_alloc (n_ready);
26636 data->ready_try_change_size = n_ready;
26637 }
26638 else if (data->ready_try_change_size < n_ready)
26639 {
26640 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26641 n_ready, 0);
26642 data->ready_try_change_size = n_ready;
26643 }
26644 bitmap_clear (data->ready_try_change);
26645
26646 /* Filter out insns from ready_try that the core will not be able to issue
26647 on current cycle due to decoder. */
26648 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26649 false);
26650 }
26651
26652 /* Revert the effect on ready_try. */
26653 static void
26654 core2i7_first_cycle_multipass_backtrack (const void *_data,
26655 signed char *ready_try,
26656 int n_ready ATTRIBUTE_UNUSED)
26657 {
26658 const_ix86_first_cycle_multipass_data_t data
26659 = (const_ix86_first_cycle_multipass_data_t) _data;
26660 unsigned int i = 0;
26661 sbitmap_iterator sbi;
26662
26663 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26664 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26665 {
26666 ready_try[i] = 0;
26667 }
26668 }
26669
26670 /* Save the result of multipass lookahead scheduling for the next round. */
26671 static void
26672 core2i7_first_cycle_multipass_end (const void *_data)
26673 {
26674 const_ix86_first_cycle_multipass_data_t data
26675 = (const_ix86_first_cycle_multipass_data_t) _data;
26676 ix86_first_cycle_multipass_data_t next_data
26677 = ix86_first_cycle_multipass_data;
26678
26679 if (data != NULL)
26680 {
26681 next_data->ifetch_block_len = data->ifetch_block_len;
26682 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26683 }
26684 }
26685
26686 /* Deallocate target data. */
26687 static void
26688 core2i7_first_cycle_multipass_fini (void *_data)
26689 {
26690 ix86_first_cycle_multipass_data_t data
26691 = (ix86_first_cycle_multipass_data_t) _data;
26692
26693 if (data->ready_try_change)
26694 {
26695 sbitmap_free (data->ready_try_change);
26696 data->ready_try_change = NULL;
26697 data->ready_try_change_size = 0;
26698 }
26699 }
26700
26701 /* Prepare for scheduling pass. */
26702 static void
26703 ix86_sched_init_global (FILE *, int, int)
26704 {
26705 /* Install scheduling hooks for current CPU. Some of these hooks are used
26706 in time-critical parts of the scheduler, so we only set them up when
26707 they are actually used. */
26708 switch (ix86_tune)
26709 {
26710 case PROCESSOR_CORE2:
26711 case PROCESSOR_NEHALEM:
26712 case PROCESSOR_SANDYBRIDGE:
26713 case PROCESSOR_HASWELL:
26714 /* Do not perform multipass scheduling for pre-reload schedule
26715 to save compile time. */
26716 if (reload_completed)
26717 {
26718 targetm.sched.dfa_post_advance_cycle
26719 = core2i7_dfa_post_advance_cycle;
26720 targetm.sched.first_cycle_multipass_init
26721 = core2i7_first_cycle_multipass_init;
26722 targetm.sched.first_cycle_multipass_begin
26723 = core2i7_first_cycle_multipass_begin;
26724 targetm.sched.first_cycle_multipass_issue
26725 = core2i7_first_cycle_multipass_issue;
26726 targetm.sched.first_cycle_multipass_backtrack
26727 = core2i7_first_cycle_multipass_backtrack;
26728 targetm.sched.first_cycle_multipass_end
26729 = core2i7_first_cycle_multipass_end;
26730 targetm.sched.first_cycle_multipass_fini
26731 = core2i7_first_cycle_multipass_fini;
26732
26733 /* Set decoder parameters. */
26734 core2i7_secondary_decoder_max_insn_size = 8;
26735 core2i7_ifetch_block_size = 16;
26736 core2i7_ifetch_block_max_insns = 6;
26737 break;
26738 }
26739 /* ... Fall through ... */
26740 default:
26741 targetm.sched.dfa_post_advance_cycle = NULL;
26742 targetm.sched.first_cycle_multipass_init = NULL;
26743 targetm.sched.first_cycle_multipass_begin = NULL;
26744 targetm.sched.first_cycle_multipass_issue = NULL;
26745 targetm.sched.first_cycle_multipass_backtrack = NULL;
26746 targetm.sched.first_cycle_multipass_end = NULL;
26747 targetm.sched.first_cycle_multipass_fini = NULL;
26748 break;
26749 }
26750 }
26751
26752 \f
26753 /* Compute the alignment given to a constant that is being placed in memory.
26754 EXP is the constant and ALIGN is the alignment that the object would
26755 ordinarily have.
26756 The value of this function is used instead of that alignment to align
26757 the object. */
26758
26759 int
26760 ix86_constant_alignment (tree exp, int align)
26761 {
26762 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26763 || TREE_CODE (exp) == INTEGER_CST)
26764 {
26765 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26766 return 64;
26767 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26768 return 128;
26769 }
26770 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26771 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26772 return BITS_PER_WORD;
26773
26774 return align;
26775 }
26776
26777 /* Compute the alignment for a static variable.
26778 TYPE is the data type, and ALIGN is the alignment that
26779 the object would ordinarily have. The value of this function is used
26780 instead of that alignment to align the object. */
26781
26782 int
26783 ix86_data_alignment (tree type, int align, bool opt)
26784 {
26785 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26786 for symbols from other compilation units or symbols that don't need
26787 to bind locally. In order to preserve some ABI compatibility with
26788 those compilers, ensure we don't decrease alignment from what we
26789 used to assume. */
26790
26791 int max_align_compat
26792 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26793
26794 /* A data structure, equal or greater than the size of a cache line
26795 (64 bytes in the Pentium 4 and other recent Intel processors, including
26796 processors based on Intel Core microarchitecture) should be aligned
26797 so that its base address is a multiple of a cache line size. */
26798
26799 int max_align
26800 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26801
26802 if (max_align < BITS_PER_WORD)
26803 max_align = BITS_PER_WORD;
26804
26805 if (opt
26806 && AGGREGATE_TYPE_P (type)
26807 && TYPE_SIZE (type)
26808 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26809 {
26810 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26811 && align < max_align_compat)
26812 align = max_align_compat;
26813 if (wi::geu_p (TYPE_SIZE (type), max_align)
26814 && align < max_align)
26815 align = max_align;
26816 }
26817
26818 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26819 to 16byte boundary. */
26820 if (TARGET_64BIT)
26821 {
26822 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26823 && TYPE_SIZE (type)
26824 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26825 && wi::geu_p (TYPE_SIZE (type), 128)
26826 && align < 128)
26827 return 128;
26828 }
26829
26830 if (!opt)
26831 return align;
26832
26833 if (TREE_CODE (type) == ARRAY_TYPE)
26834 {
26835 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26836 return 64;
26837 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26838 return 128;
26839 }
26840 else if (TREE_CODE (type) == COMPLEX_TYPE)
26841 {
26842
26843 if (TYPE_MODE (type) == DCmode && align < 64)
26844 return 64;
26845 if ((TYPE_MODE (type) == XCmode
26846 || TYPE_MODE (type) == TCmode) && align < 128)
26847 return 128;
26848 }
26849 else if ((TREE_CODE (type) == RECORD_TYPE
26850 || TREE_CODE (type) == UNION_TYPE
26851 || TREE_CODE (type) == QUAL_UNION_TYPE)
26852 && TYPE_FIELDS (type))
26853 {
26854 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26855 return 64;
26856 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26857 return 128;
26858 }
26859 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26860 || TREE_CODE (type) == INTEGER_TYPE)
26861 {
26862 if (TYPE_MODE (type) == DFmode && align < 64)
26863 return 64;
26864 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26865 return 128;
26866 }
26867
26868 return align;
26869 }
26870
26871 /* Compute the alignment for a local variable or a stack slot. EXP is
26872 the data type or decl itself, MODE is the widest mode available and
26873 ALIGN is the alignment that the object would ordinarily have. The
26874 value of this macro is used instead of that alignment to align the
26875 object. */
26876
26877 unsigned int
26878 ix86_local_alignment (tree exp, enum machine_mode mode,
26879 unsigned int align)
26880 {
26881 tree type, decl;
26882
26883 if (exp && DECL_P (exp))
26884 {
26885 type = TREE_TYPE (exp);
26886 decl = exp;
26887 }
26888 else
26889 {
26890 type = exp;
26891 decl = NULL;
26892 }
26893
26894 /* Don't do dynamic stack realignment for long long objects with
26895 -mpreferred-stack-boundary=2. */
26896 if (!TARGET_64BIT
26897 && align == 64
26898 && ix86_preferred_stack_boundary < 64
26899 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26900 && (!type || !TYPE_USER_ALIGN (type))
26901 && (!decl || !DECL_USER_ALIGN (decl)))
26902 align = 32;
26903
26904 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26905 register in MODE. We will return the largest alignment of XF
26906 and DF. */
26907 if (!type)
26908 {
26909 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26910 align = GET_MODE_ALIGNMENT (DFmode);
26911 return align;
26912 }
26913
26914 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26915 to 16byte boundary. Exact wording is:
26916
26917 An array uses the same alignment as its elements, except that a local or
26918 global array variable of length at least 16 bytes or
26919 a C99 variable-length array variable always has alignment of at least 16 bytes.
26920
26921 This was added to allow use of aligned SSE instructions at arrays. This
26922 rule is meant for static storage (where compiler can not do the analysis
26923 by itself). We follow it for automatic variables only when convenient.
26924 We fully control everything in the function compiled and functions from
26925 other unit can not rely on the alignment.
26926
26927 Exclude va_list type. It is the common case of local array where
26928 we can not benefit from the alignment.
26929
26930 TODO: Probably one should optimize for size only when var is not escaping. */
26931 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26932 && TARGET_SSE)
26933 {
26934 if (AGGREGATE_TYPE_P (type)
26935 && (va_list_type_node == NULL_TREE
26936 || (TYPE_MAIN_VARIANT (type)
26937 != TYPE_MAIN_VARIANT (va_list_type_node)))
26938 && TYPE_SIZE (type)
26939 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26940 && wi::geu_p (TYPE_SIZE (type), 16)
26941 && align < 128)
26942 return 128;
26943 }
26944 if (TREE_CODE (type) == ARRAY_TYPE)
26945 {
26946 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26947 return 64;
26948 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26949 return 128;
26950 }
26951 else if (TREE_CODE (type) == COMPLEX_TYPE)
26952 {
26953 if (TYPE_MODE (type) == DCmode && align < 64)
26954 return 64;
26955 if ((TYPE_MODE (type) == XCmode
26956 || TYPE_MODE (type) == TCmode) && align < 128)
26957 return 128;
26958 }
26959 else if ((TREE_CODE (type) == RECORD_TYPE
26960 || TREE_CODE (type) == UNION_TYPE
26961 || TREE_CODE (type) == QUAL_UNION_TYPE)
26962 && TYPE_FIELDS (type))
26963 {
26964 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26965 return 64;
26966 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26967 return 128;
26968 }
26969 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26970 || TREE_CODE (type) == INTEGER_TYPE)
26971 {
26972
26973 if (TYPE_MODE (type) == DFmode && align < 64)
26974 return 64;
26975 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26976 return 128;
26977 }
26978 return align;
26979 }
26980
26981 /* Compute the minimum required alignment for dynamic stack realignment
26982 purposes for a local variable, parameter or a stack slot. EXP is
26983 the data type or decl itself, MODE is its mode and ALIGN is the
26984 alignment that the object would ordinarily have. */
26985
26986 unsigned int
26987 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26988 unsigned int align)
26989 {
26990 tree type, decl;
26991
26992 if (exp && DECL_P (exp))
26993 {
26994 type = TREE_TYPE (exp);
26995 decl = exp;
26996 }
26997 else
26998 {
26999 type = exp;
27000 decl = NULL;
27001 }
27002
27003 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27004 return align;
27005
27006 /* Don't do dynamic stack realignment for long long objects with
27007 -mpreferred-stack-boundary=2. */
27008 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27009 && (!type || !TYPE_USER_ALIGN (type))
27010 && (!decl || !DECL_USER_ALIGN (decl)))
27011 return 32;
27012
27013 return align;
27014 }
27015 \f
27016 /* Find a location for the static chain incoming to a nested function.
27017 This is a register, unless all free registers are used by arguments. */
27018
27019 static rtx
27020 ix86_static_chain (const_tree fndecl, bool incoming_p)
27021 {
27022 unsigned regno;
27023
27024 if (!DECL_STATIC_CHAIN (fndecl))
27025 return NULL;
27026
27027 if (TARGET_64BIT)
27028 {
27029 /* We always use R10 in 64-bit mode. */
27030 regno = R10_REG;
27031 }
27032 else
27033 {
27034 tree fntype;
27035 unsigned int ccvt;
27036
27037 /* By default in 32-bit mode we use ECX to pass the static chain. */
27038 regno = CX_REG;
27039
27040 fntype = TREE_TYPE (fndecl);
27041 ccvt = ix86_get_callcvt (fntype);
27042 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27043 {
27044 /* Fastcall functions use ecx/edx for arguments, which leaves
27045 us with EAX for the static chain.
27046 Thiscall functions use ecx for arguments, which also
27047 leaves us with EAX for the static chain. */
27048 regno = AX_REG;
27049 }
27050 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27051 {
27052 /* Thiscall functions use ecx for arguments, which leaves
27053 us with EAX and EDX for the static chain.
27054 We are using for abi-compatibility EAX. */
27055 regno = AX_REG;
27056 }
27057 else if (ix86_function_regparm (fntype, fndecl) == 3)
27058 {
27059 /* For regparm 3, we have no free call-clobbered registers in
27060 which to store the static chain. In order to implement this,
27061 we have the trampoline push the static chain to the stack.
27062 However, we can't push a value below the return address when
27063 we call the nested function directly, so we have to use an
27064 alternate entry point. For this we use ESI, and have the
27065 alternate entry point push ESI, so that things appear the
27066 same once we're executing the nested function. */
27067 if (incoming_p)
27068 {
27069 if (fndecl == current_function_decl)
27070 ix86_static_chain_on_stack = true;
27071 return gen_frame_mem (SImode,
27072 plus_constant (Pmode,
27073 arg_pointer_rtx, -8));
27074 }
27075 regno = SI_REG;
27076 }
27077 }
27078
27079 return gen_rtx_REG (Pmode, regno);
27080 }
27081
27082 /* Emit RTL insns to initialize the variable parts of a trampoline.
27083 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27084 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27085 to be passed to the target function. */
27086
27087 static void
27088 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27089 {
27090 rtx mem, fnaddr;
27091 int opcode;
27092 int offset = 0;
27093
27094 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27095
27096 if (TARGET_64BIT)
27097 {
27098 int size;
27099
27100 /* Load the function address to r11. Try to load address using
27101 the shorter movl instead of movabs. We may want to support
27102 movq for kernel mode, but kernel does not use trampolines at
27103 the moment. FNADDR is a 32bit address and may not be in
27104 DImode when ptr_mode == SImode. Always use movl in this
27105 case. */
27106 if (ptr_mode == SImode
27107 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27108 {
27109 fnaddr = copy_addr_to_reg (fnaddr);
27110
27111 mem = adjust_address (m_tramp, HImode, offset);
27112 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27113
27114 mem = adjust_address (m_tramp, SImode, offset + 2);
27115 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27116 offset += 6;
27117 }
27118 else
27119 {
27120 mem = adjust_address (m_tramp, HImode, offset);
27121 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27122
27123 mem = adjust_address (m_tramp, DImode, offset + 2);
27124 emit_move_insn (mem, fnaddr);
27125 offset += 10;
27126 }
27127
27128 /* Load static chain using movabs to r10. Use the shorter movl
27129 instead of movabs when ptr_mode == SImode. */
27130 if (ptr_mode == SImode)
27131 {
27132 opcode = 0xba41;
27133 size = 6;
27134 }
27135 else
27136 {
27137 opcode = 0xba49;
27138 size = 10;
27139 }
27140
27141 mem = adjust_address (m_tramp, HImode, offset);
27142 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27143
27144 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27145 emit_move_insn (mem, chain_value);
27146 offset += size;
27147
27148 /* Jump to r11; the last (unused) byte is a nop, only there to
27149 pad the write out to a single 32-bit store. */
27150 mem = adjust_address (m_tramp, SImode, offset);
27151 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27152 offset += 4;
27153 }
27154 else
27155 {
27156 rtx disp, chain;
27157
27158 /* Depending on the static chain location, either load a register
27159 with a constant, or push the constant to the stack. All of the
27160 instructions are the same size. */
27161 chain = ix86_static_chain (fndecl, true);
27162 if (REG_P (chain))
27163 {
27164 switch (REGNO (chain))
27165 {
27166 case AX_REG:
27167 opcode = 0xb8; break;
27168 case CX_REG:
27169 opcode = 0xb9; break;
27170 default:
27171 gcc_unreachable ();
27172 }
27173 }
27174 else
27175 opcode = 0x68;
27176
27177 mem = adjust_address (m_tramp, QImode, offset);
27178 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27179
27180 mem = adjust_address (m_tramp, SImode, offset + 1);
27181 emit_move_insn (mem, chain_value);
27182 offset += 5;
27183
27184 mem = adjust_address (m_tramp, QImode, offset);
27185 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27186
27187 mem = adjust_address (m_tramp, SImode, offset + 1);
27188
27189 /* Compute offset from the end of the jmp to the target function.
27190 In the case in which the trampoline stores the static chain on
27191 the stack, we need to skip the first insn which pushes the
27192 (call-saved) register static chain; this push is 1 byte. */
27193 offset += 5;
27194 disp = expand_binop (SImode, sub_optab, fnaddr,
27195 plus_constant (Pmode, XEXP (m_tramp, 0),
27196 offset - (MEM_P (chain) ? 1 : 0)),
27197 NULL_RTX, 1, OPTAB_DIRECT);
27198 emit_move_insn (mem, disp);
27199 }
27200
27201 gcc_assert (offset <= TRAMPOLINE_SIZE);
27202
27203 #ifdef HAVE_ENABLE_EXECUTE_STACK
27204 #ifdef CHECK_EXECUTE_STACK_ENABLED
27205 if (CHECK_EXECUTE_STACK_ENABLED)
27206 #endif
27207 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27208 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27209 #endif
27210 }
27211 \f
27212 /* The following file contains several enumerations and data structures
27213 built from the definitions in i386-builtin-types.def. */
27214
27215 #include "i386-builtin-types.inc"
27216
27217 /* Table for the ix86 builtin non-function types. */
27218 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27219
27220 /* Retrieve an element from the above table, building some of
27221 the types lazily. */
27222
27223 static tree
27224 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27225 {
27226 unsigned int index;
27227 tree type, itype;
27228
27229 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27230
27231 type = ix86_builtin_type_tab[(int) tcode];
27232 if (type != NULL)
27233 return type;
27234
27235 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27236 if (tcode <= IX86_BT_LAST_VECT)
27237 {
27238 enum machine_mode mode;
27239
27240 index = tcode - IX86_BT_LAST_PRIM - 1;
27241 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27242 mode = ix86_builtin_type_vect_mode[index];
27243
27244 type = build_vector_type_for_mode (itype, mode);
27245 }
27246 else
27247 {
27248 int quals;
27249
27250 index = tcode - IX86_BT_LAST_VECT - 1;
27251 if (tcode <= IX86_BT_LAST_PTR)
27252 quals = TYPE_UNQUALIFIED;
27253 else
27254 quals = TYPE_QUAL_CONST;
27255
27256 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27257 if (quals != TYPE_UNQUALIFIED)
27258 itype = build_qualified_type (itype, quals);
27259
27260 type = build_pointer_type (itype);
27261 }
27262
27263 ix86_builtin_type_tab[(int) tcode] = type;
27264 return type;
27265 }
27266
27267 /* Table for the ix86 builtin function types. */
27268 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27269
27270 /* Retrieve an element from the above table, building some of
27271 the types lazily. */
27272
27273 static tree
27274 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27275 {
27276 tree type;
27277
27278 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27279
27280 type = ix86_builtin_func_type_tab[(int) tcode];
27281 if (type != NULL)
27282 return type;
27283
27284 if (tcode <= IX86_BT_LAST_FUNC)
27285 {
27286 unsigned start = ix86_builtin_func_start[(int) tcode];
27287 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27288 tree rtype, atype, args = void_list_node;
27289 unsigned i;
27290
27291 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27292 for (i = after - 1; i > start; --i)
27293 {
27294 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27295 args = tree_cons (NULL, atype, args);
27296 }
27297
27298 type = build_function_type (rtype, args);
27299 }
27300 else
27301 {
27302 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27303 enum ix86_builtin_func_type icode;
27304
27305 icode = ix86_builtin_func_alias_base[index];
27306 type = ix86_get_builtin_func_type (icode);
27307 }
27308
27309 ix86_builtin_func_type_tab[(int) tcode] = type;
27310 return type;
27311 }
27312
27313
27314 /* Codes for all the SSE/MMX builtins. */
27315 enum ix86_builtins
27316 {
27317 IX86_BUILTIN_ADDPS,
27318 IX86_BUILTIN_ADDSS,
27319 IX86_BUILTIN_DIVPS,
27320 IX86_BUILTIN_DIVSS,
27321 IX86_BUILTIN_MULPS,
27322 IX86_BUILTIN_MULSS,
27323 IX86_BUILTIN_SUBPS,
27324 IX86_BUILTIN_SUBSS,
27325
27326 IX86_BUILTIN_CMPEQPS,
27327 IX86_BUILTIN_CMPLTPS,
27328 IX86_BUILTIN_CMPLEPS,
27329 IX86_BUILTIN_CMPGTPS,
27330 IX86_BUILTIN_CMPGEPS,
27331 IX86_BUILTIN_CMPNEQPS,
27332 IX86_BUILTIN_CMPNLTPS,
27333 IX86_BUILTIN_CMPNLEPS,
27334 IX86_BUILTIN_CMPNGTPS,
27335 IX86_BUILTIN_CMPNGEPS,
27336 IX86_BUILTIN_CMPORDPS,
27337 IX86_BUILTIN_CMPUNORDPS,
27338 IX86_BUILTIN_CMPEQSS,
27339 IX86_BUILTIN_CMPLTSS,
27340 IX86_BUILTIN_CMPLESS,
27341 IX86_BUILTIN_CMPNEQSS,
27342 IX86_BUILTIN_CMPNLTSS,
27343 IX86_BUILTIN_CMPNLESS,
27344 IX86_BUILTIN_CMPORDSS,
27345 IX86_BUILTIN_CMPUNORDSS,
27346
27347 IX86_BUILTIN_COMIEQSS,
27348 IX86_BUILTIN_COMILTSS,
27349 IX86_BUILTIN_COMILESS,
27350 IX86_BUILTIN_COMIGTSS,
27351 IX86_BUILTIN_COMIGESS,
27352 IX86_BUILTIN_COMINEQSS,
27353 IX86_BUILTIN_UCOMIEQSS,
27354 IX86_BUILTIN_UCOMILTSS,
27355 IX86_BUILTIN_UCOMILESS,
27356 IX86_BUILTIN_UCOMIGTSS,
27357 IX86_BUILTIN_UCOMIGESS,
27358 IX86_BUILTIN_UCOMINEQSS,
27359
27360 IX86_BUILTIN_CVTPI2PS,
27361 IX86_BUILTIN_CVTPS2PI,
27362 IX86_BUILTIN_CVTSI2SS,
27363 IX86_BUILTIN_CVTSI642SS,
27364 IX86_BUILTIN_CVTSS2SI,
27365 IX86_BUILTIN_CVTSS2SI64,
27366 IX86_BUILTIN_CVTTPS2PI,
27367 IX86_BUILTIN_CVTTSS2SI,
27368 IX86_BUILTIN_CVTTSS2SI64,
27369
27370 IX86_BUILTIN_MAXPS,
27371 IX86_BUILTIN_MAXSS,
27372 IX86_BUILTIN_MINPS,
27373 IX86_BUILTIN_MINSS,
27374
27375 IX86_BUILTIN_LOADUPS,
27376 IX86_BUILTIN_STOREUPS,
27377 IX86_BUILTIN_MOVSS,
27378
27379 IX86_BUILTIN_MOVHLPS,
27380 IX86_BUILTIN_MOVLHPS,
27381 IX86_BUILTIN_LOADHPS,
27382 IX86_BUILTIN_LOADLPS,
27383 IX86_BUILTIN_STOREHPS,
27384 IX86_BUILTIN_STORELPS,
27385
27386 IX86_BUILTIN_MASKMOVQ,
27387 IX86_BUILTIN_MOVMSKPS,
27388 IX86_BUILTIN_PMOVMSKB,
27389
27390 IX86_BUILTIN_MOVNTPS,
27391 IX86_BUILTIN_MOVNTQ,
27392
27393 IX86_BUILTIN_LOADDQU,
27394 IX86_BUILTIN_STOREDQU,
27395
27396 IX86_BUILTIN_PACKSSWB,
27397 IX86_BUILTIN_PACKSSDW,
27398 IX86_BUILTIN_PACKUSWB,
27399
27400 IX86_BUILTIN_PADDB,
27401 IX86_BUILTIN_PADDW,
27402 IX86_BUILTIN_PADDD,
27403 IX86_BUILTIN_PADDQ,
27404 IX86_BUILTIN_PADDSB,
27405 IX86_BUILTIN_PADDSW,
27406 IX86_BUILTIN_PADDUSB,
27407 IX86_BUILTIN_PADDUSW,
27408 IX86_BUILTIN_PSUBB,
27409 IX86_BUILTIN_PSUBW,
27410 IX86_BUILTIN_PSUBD,
27411 IX86_BUILTIN_PSUBQ,
27412 IX86_BUILTIN_PSUBSB,
27413 IX86_BUILTIN_PSUBSW,
27414 IX86_BUILTIN_PSUBUSB,
27415 IX86_BUILTIN_PSUBUSW,
27416
27417 IX86_BUILTIN_PAND,
27418 IX86_BUILTIN_PANDN,
27419 IX86_BUILTIN_POR,
27420 IX86_BUILTIN_PXOR,
27421
27422 IX86_BUILTIN_PAVGB,
27423 IX86_BUILTIN_PAVGW,
27424
27425 IX86_BUILTIN_PCMPEQB,
27426 IX86_BUILTIN_PCMPEQW,
27427 IX86_BUILTIN_PCMPEQD,
27428 IX86_BUILTIN_PCMPGTB,
27429 IX86_BUILTIN_PCMPGTW,
27430 IX86_BUILTIN_PCMPGTD,
27431
27432 IX86_BUILTIN_PMADDWD,
27433
27434 IX86_BUILTIN_PMAXSW,
27435 IX86_BUILTIN_PMAXUB,
27436 IX86_BUILTIN_PMINSW,
27437 IX86_BUILTIN_PMINUB,
27438
27439 IX86_BUILTIN_PMULHUW,
27440 IX86_BUILTIN_PMULHW,
27441 IX86_BUILTIN_PMULLW,
27442
27443 IX86_BUILTIN_PSADBW,
27444 IX86_BUILTIN_PSHUFW,
27445
27446 IX86_BUILTIN_PSLLW,
27447 IX86_BUILTIN_PSLLD,
27448 IX86_BUILTIN_PSLLQ,
27449 IX86_BUILTIN_PSRAW,
27450 IX86_BUILTIN_PSRAD,
27451 IX86_BUILTIN_PSRLW,
27452 IX86_BUILTIN_PSRLD,
27453 IX86_BUILTIN_PSRLQ,
27454 IX86_BUILTIN_PSLLWI,
27455 IX86_BUILTIN_PSLLDI,
27456 IX86_BUILTIN_PSLLQI,
27457 IX86_BUILTIN_PSRAWI,
27458 IX86_BUILTIN_PSRADI,
27459 IX86_BUILTIN_PSRLWI,
27460 IX86_BUILTIN_PSRLDI,
27461 IX86_BUILTIN_PSRLQI,
27462
27463 IX86_BUILTIN_PUNPCKHBW,
27464 IX86_BUILTIN_PUNPCKHWD,
27465 IX86_BUILTIN_PUNPCKHDQ,
27466 IX86_BUILTIN_PUNPCKLBW,
27467 IX86_BUILTIN_PUNPCKLWD,
27468 IX86_BUILTIN_PUNPCKLDQ,
27469
27470 IX86_BUILTIN_SHUFPS,
27471
27472 IX86_BUILTIN_RCPPS,
27473 IX86_BUILTIN_RCPSS,
27474 IX86_BUILTIN_RSQRTPS,
27475 IX86_BUILTIN_RSQRTPS_NR,
27476 IX86_BUILTIN_RSQRTSS,
27477 IX86_BUILTIN_RSQRTF,
27478 IX86_BUILTIN_SQRTPS,
27479 IX86_BUILTIN_SQRTPS_NR,
27480 IX86_BUILTIN_SQRTSS,
27481
27482 IX86_BUILTIN_UNPCKHPS,
27483 IX86_BUILTIN_UNPCKLPS,
27484
27485 IX86_BUILTIN_ANDPS,
27486 IX86_BUILTIN_ANDNPS,
27487 IX86_BUILTIN_ORPS,
27488 IX86_BUILTIN_XORPS,
27489
27490 IX86_BUILTIN_EMMS,
27491 IX86_BUILTIN_LDMXCSR,
27492 IX86_BUILTIN_STMXCSR,
27493 IX86_BUILTIN_SFENCE,
27494
27495 IX86_BUILTIN_FXSAVE,
27496 IX86_BUILTIN_FXRSTOR,
27497 IX86_BUILTIN_FXSAVE64,
27498 IX86_BUILTIN_FXRSTOR64,
27499
27500 IX86_BUILTIN_XSAVE,
27501 IX86_BUILTIN_XRSTOR,
27502 IX86_BUILTIN_XSAVE64,
27503 IX86_BUILTIN_XRSTOR64,
27504
27505 IX86_BUILTIN_XSAVEOPT,
27506 IX86_BUILTIN_XSAVEOPT64,
27507
27508 IX86_BUILTIN_XSAVEC,
27509 IX86_BUILTIN_XSAVEC64,
27510
27511 IX86_BUILTIN_XSAVES,
27512 IX86_BUILTIN_XRSTORS,
27513 IX86_BUILTIN_XSAVES64,
27514 IX86_BUILTIN_XRSTORS64,
27515
27516 /* 3DNow! Original */
27517 IX86_BUILTIN_FEMMS,
27518 IX86_BUILTIN_PAVGUSB,
27519 IX86_BUILTIN_PF2ID,
27520 IX86_BUILTIN_PFACC,
27521 IX86_BUILTIN_PFADD,
27522 IX86_BUILTIN_PFCMPEQ,
27523 IX86_BUILTIN_PFCMPGE,
27524 IX86_BUILTIN_PFCMPGT,
27525 IX86_BUILTIN_PFMAX,
27526 IX86_BUILTIN_PFMIN,
27527 IX86_BUILTIN_PFMUL,
27528 IX86_BUILTIN_PFRCP,
27529 IX86_BUILTIN_PFRCPIT1,
27530 IX86_BUILTIN_PFRCPIT2,
27531 IX86_BUILTIN_PFRSQIT1,
27532 IX86_BUILTIN_PFRSQRT,
27533 IX86_BUILTIN_PFSUB,
27534 IX86_BUILTIN_PFSUBR,
27535 IX86_BUILTIN_PI2FD,
27536 IX86_BUILTIN_PMULHRW,
27537
27538 /* 3DNow! Athlon Extensions */
27539 IX86_BUILTIN_PF2IW,
27540 IX86_BUILTIN_PFNACC,
27541 IX86_BUILTIN_PFPNACC,
27542 IX86_BUILTIN_PI2FW,
27543 IX86_BUILTIN_PSWAPDSI,
27544 IX86_BUILTIN_PSWAPDSF,
27545
27546 /* SSE2 */
27547 IX86_BUILTIN_ADDPD,
27548 IX86_BUILTIN_ADDSD,
27549 IX86_BUILTIN_DIVPD,
27550 IX86_BUILTIN_DIVSD,
27551 IX86_BUILTIN_MULPD,
27552 IX86_BUILTIN_MULSD,
27553 IX86_BUILTIN_SUBPD,
27554 IX86_BUILTIN_SUBSD,
27555
27556 IX86_BUILTIN_CMPEQPD,
27557 IX86_BUILTIN_CMPLTPD,
27558 IX86_BUILTIN_CMPLEPD,
27559 IX86_BUILTIN_CMPGTPD,
27560 IX86_BUILTIN_CMPGEPD,
27561 IX86_BUILTIN_CMPNEQPD,
27562 IX86_BUILTIN_CMPNLTPD,
27563 IX86_BUILTIN_CMPNLEPD,
27564 IX86_BUILTIN_CMPNGTPD,
27565 IX86_BUILTIN_CMPNGEPD,
27566 IX86_BUILTIN_CMPORDPD,
27567 IX86_BUILTIN_CMPUNORDPD,
27568 IX86_BUILTIN_CMPEQSD,
27569 IX86_BUILTIN_CMPLTSD,
27570 IX86_BUILTIN_CMPLESD,
27571 IX86_BUILTIN_CMPNEQSD,
27572 IX86_BUILTIN_CMPNLTSD,
27573 IX86_BUILTIN_CMPNLESD,
27574 IX86_BUILTIN_CMPORDSD,
27575 IX86_BUILTIN_CMPUNORDSD,
27576
27577 IX86_BUILTIN_COMIEQSD,
27578 IX86_BUILTIN_COMILTSD,
27579 IX86_BUILTIN_COMILESD,
27580 IX86_BUILTIN_COMIGTSD,
27581 IX86_BUILTIN_COMIGESD,
27582 IX86_BUILTIN_COMINEQSD,
27583 IX86_BUILTIN_UCOMIEQSD,
27584 IX86_BUILTIN_UCOMILTSD,
27585 IX86_BUILTIN_UCOMILESD,
27586 IX86_BUILTIN_UCOMIGTSD,
27587 IX86_BUILTIN_UCOMIGESD,
27588 IX86_BUILTIN_UCOMINEQSD,
27589
27590 IX86_BUILTIN_MAXPD,
27591 IX86_BUILTIN_MAXSD,
27592 IX86_BUILTIN_MINPD,
27593 IX86_BUILTIN_MINSD,
27594
27595 IX86_BUILTIN_ANDPD,
27596 IX86_BUILTIN_ANDNPD,
27597 IX86_BUILTIN_ORPD,
27598 IX86_BUILTIN_XORPD,
27599
27600 IX86_BUILTIN_SQRTPD,
27601 IX86_BUILTIN_SQRTSD,
27602
27603 IX86_BUILTIN_UNPCKHPD,
27604 IX86_BUILTIN_UNPCKLPD,
27605
27606 IX86_BUILTIN_SHUFPD,
27607
27608 IX86_BUILTIN_LOADUPD,
27609 IX86_BUILTIN_STOREUPD,
27610 IX86_BUILTIN_MOVSD,
27611
27612 IX86_BUILTIN_LOADHPD,
27613 IX86_BUILTIN_LOADLPD,
27614
27615 IX86_BUILTIN_CVTDQ2PD,
27616 IX86_BUILTIN_CVTDQ2PS,
27617
27618 IX86_BUILTIN_CVTPD2DQ,
27619 IX86_BUILTIN_CVTPD2PI,
27620 IX86_BUILTIN_CVTPD2PS,
27621 IX86_BUILTIN_CVTTPD2DQ,
27622 IX86_BUILTIN_CVTTPD2PI,
27623
27624 IX86_BUILTIN_CVTPI2PD,
27625 IX86_BUILTIN_CVTSI2SD,
27626 IX86_BUILTIN_CVTSI642SD,
27627
27628 IX86_BUILTIN_CVTSD2SI,
27629 IX86_BUILTIN_CVTSD2SI64,
27630 IX86_BUILTIN_CVTSD2SS,
27631 IX86_BUILTIN_CVTSS2SD,
27632 IX86_BUILTIN_CVTTSD2SI,
27633 IX86_BUILTIN_CVTTSD2SI64,
27634
27635 IX86_BUILTIN_CVTPS2DQ,
27636 IX86_BUILTIN_CVTPS2PD,
27637 IX86_BUILTIN_CVTTPS2DQ,
27638
27639 IX86_BUILTIN_MOVNTI,
27640 IX86_BUILTIN_MOVNTI64,
27641 IX86_BUILTIN_MOVNTPD,
27642 IX86_BUILTIN_MOVNTDQ,
27643
27644 IX86_BUILTIN_MOVQ128,
27645
27646 /* SSE2 MMX */
27647 IX86_BUILTIN_MASKMOVDQU,
27648 IX86_BUILTIN_MOVMSKPD,
27649 IX86_BUILTIN_PMOVMSKB128,
27650
27651 IX86_BUILTIN_PACKSSWB128,
27652 IX86_BUILTIN_PACKSSDW128,
27653 IX86_BUILTIN_PACKUSWB128,
27654
27655 IX86_BUILTIN_PADDB128,
27656 IX86_BUILTIN_PADDW128,
27657 IX86_BUILTIN_PADDD128,
27658 IX86_BUILTIN_PADDQ128,
27659 IX86_BUILTIN_PADDSB128,
27660 IX86_BUILTIN_PADDSW128,
27661 IX86_BUILTIN_PADDUSB128,
27662 IX86_BUILTIN_PADDUSW128,
27663 IX86_BUILTIN_PSUBB128,
27664 IX86_BUILTIN_PSUBW128,
27665 IX86_BUILTIN_PSUBD128,
27666 IX86_BUILTIN_PSUBQ128,
27667 IX86_BUILTIN_PSUBSB128,
27668 IX86_BUILTIN_PSUBSW128,
27669 IX86_BUILTIN_PSUBUSB128,
27670 IX86_BUILTIN_PSUBUSW128,
27671
27672 IX86_BUILTIN_PAND128,
27673 IX86_BUILTIN_PANDN128,
27674 IX86_BUILTIN_POR128,
27675 IX86_BUILTIN_PXOR128,
27676
27677 IX86_BUILTIN_PAVGB128,
27678 IX86_BUILTIN_PAVGW128,
27679
27680 IX86_BUILTIN_PCMPEQB128,
27681 IX86_BUILTIN_PCMPEQW128,
27682 IX86_BUILTIN_PCMPEQD128,
27683 IX86_BUILTIN_PCMPGTB128,
27684 IX86_BUILTIN_PCMPGTW128,
27685 IX86_BUILTIN_PCMPGTD128,
27686
27687 IX86_BUILTIN_PMADDWD128,
27688
27689 IX86_BUILTIN_PMAXSW128,
27690 IX86_BUILTIN_PMAXUB128,
27691 IX86_BUILTIN_PMINSW128,
27692 IX86_BUILTIN_PMINUB128,
27693
27694 IX86_BUILTIN_PMULUDQ,
27695 IX86_BUILTIN_PMULUDQ128,
27696 IX86_BUILTIN_PMULHUW128,
27697 IX86_BUILTIN_PMULHW128,
27698 IX86_BUILTIN_PMULLW128,
27699
27700 IX86_BUILTIN_PSADBW128,
27701 IX86_BUILTIN_PSHUFHW,
27702 IX86_BUILTIN_PSHUFLW,
27703 IX86_BUILTIN_PSHUFD,
27704
27705 IX86_BUILTIN_PSLLDQI128,
27706 IX86_BUILTIN_PSLLWI128,
27707 IX86_BUILTIN_PSLLDI128,
27708 IX86_BUILTIN_PSLLQI128,
27709 IX86_BUILTIN_PSRAWI128,
27710 IX86_BUILTIN_PSRADI128,
27711 IX86_BUILTIN_PSRLDQI128,
27712 IX86_BUILTIN_PSRLWI128,
27713 IX86_BUILTIN_PSRLDI128,
27714 IX86_BUILTIN_PSRLQI128,
27715
27716 IX86_BUILTIN_PSLLDQ128,
27717 IX86_BUILTIN_PSLLW128,
27718 IX86_BUILTIN_PSLLD128,
27719 IX86_BUILTIN_PSLLQ128,
27720 IX86_BUILTIN_PSRAW128,
27721 IX86_BUILTIN_PSRAD128,
27722 IX86_BUILTIN_PSRLW128,
27723 IX86_BUILTIN_PSRLD128,
27724 IX86_BUILTIN_PSRLQ128,
27725
27726 IX86_BUILTIN_PUNPCKHBW128,
27727 IX86_BUILTIN_PUNPCKHWD128,
27728 IX86_BUILTIN_PUNPCKHDQ128,
27729 IX86_BUILTIN_PUNPCKHQDQ128,
27730 IX86_BUILTIN_PUNPCKLBW128,
27731 IX86_BUILTIN_PUNPCKLWD128,
27732 IX86_BUILTIN_PUNPCKLDQ128,
27733 IX86_BUILTIN_PUNPCKLQDQ128,
27734
27735 IX86_BUILTIN_CLFLUSH,
27736 IX86_BUILTIN_MFENCE,
27737 IX86_BUILTIN_LFENCE,
27738 IX86_BUILTIN_PAUSE,
27739
27740 IX86_BUILTIN_FNSTENV,
27741 IX86_BUILTIN_FLDENV,
27742 IX86_BUILTIN_FNSTSW,
27743 IX86_BUILTIN_FNCLEX,
27744
27745 IX86_BUILTIN_BSRSI,
27746 IX86_BUILTIN_BSRDI,
27747 IX86_BUILTIN_RDPMC,
27748 IX86_BUILTIN_RDTSC,
27749 IX86_BUILTIN_RDTSCP,
27750 IX86_BUILTIN_ROLQI,
27751 IX86_BUILTIN_ROLHI,
27752 IX86_BUILTIN_RORQI,
27753 IX86_BUILTIN_RORHI,
27754
27755 /* SSE3. */
27756 IX86_BUILTIN_ADDSUBPS,
27757 IX86_BUILTIN_HADDPS,
27758 IX86_BUILTIN_HSUBPS,
27759 IX86_BUILTIN_MOVSHDUP,
27760 IX86_BUILTIN_MOVSLDUP,
27761 IX86_BUILTIN_ADDSUBPD,
27762 IX86_BUILTIN_HADDPD,
27763 IX86_BUILTIN_HSUBPD,
27764 IX86_BUILTIN_LDDQU,
27765
27766 IX86_BUILTIN_MONITOR,
27767 IX86_BUILTIN_MWAIT,
27768
27769 /* SSSE3. */
27770 IX86_BUILTIN_PHADDW,
27771 IX86_BUILTIN_PHADDD,
27772 IX86_BUILTIN_PHADDSW,
27773 IX86_BUILTIN_PHSUBW,
27774 IX86_BUILTIN_PHSUBD,
27775 IX86_BUILTIN_PHSUBSW,
27776 IX86_BUILTIN_PMADDUBSW,
27777 IX86_BUILTIN_PMULHRSW,
27778 IX86_BUILTIN_PSHUFB,
27779 IX86_BUILTIN_PSIGNB,
27780 IX86_BUILTIN_PSIGNW,
27781 IX86_BUILTIN_PSIGND,
27782 IX86_BUILTIN_PALIGNR,
27783 IX86_BUILTIN_PABSB,
27784 IX86_BUILTIN_PABSW,
27785 IX86_BUILTIN_PABSD,
27786
27787 IX86_BUILTIN_PHADDW128,
27788 IX86_BUILTIN_PHADDD128,
27789 IX86_BUILTIN_PHADDSW128,
27790 IX86_BUILTIN_PHSUBW128,
27791 IX86_BUILTIN_PHSUBD128,
27792 IX86_BUILTIN_PHSUBSW128,
27793 IX86_BUILTIN_PMADDUBSW128,
27794 IX86_BUILTIN_PMULHRSW128,
27795 IX86_BUILTIN_PSHUFB128,
27796 IX86_BUILTIN_PSIGNB128,
27797 IX86_BUILTIN_PSIGNW128,
27798 IX86_BUILTIN_PSIGND128,
27799 IX86_BUILTIN_PALIGNR128,
27800 IX86_BUILTIN_PABSB128,
27801 IX86_BUILTIN_PABSW128,
27802 IX86_BUILTIN_PABSD128,
27803
27804 /* AMDFAM10 - SSE4A New Instructions. */
27805 IX86_BUILTIN_MOVNTSD,
27806 IX86_BUILTIN_MOVNTSS,
27807 IX86_BUILTIN_EXTRQI,
27808 IX86_BUILTIN_EXTRQ,
27809 IX86_BUILTIN_INSERTQI,
27810 IX86_BUILTIN_INSERTQ,
27811
27812 /* SSE4.1. */
27813 IX86_BUILTIN_BLENDPD,
27814 IX86_BUILTIN_BLENDPS,
27815 IX86_BUILTIN_BLENDVPD,
27816 IX86_BUILTIN_BLENDVPS,
27817 IX86_BUILTIN_PBLENDVB128,
27818 IX86_BUILTIN_PBLENDW128,
27819
27820 IX86_BUILTIN_DPPD,
27821 IX86_BUILTIN_DPPS,
27822
27823 IX86_BUILTIN_INSERTPS128,
27824
27825 IX86_BUILTIN_MOVNTDQA,
27826 IX86_BUILTIN_MPSADBW128,
27827 IX86_BUILTIN_PACKUSDW128,
27828 IX86_BUILTIN_PCMPEQQ,
27829 IX86_BUILTIN_PHMINPOSUW128,
27830
27831 IX86_BUILTIN_PMAXSB128,
27832 IX86_BUILTIN_PMAXSD128,
27833 IX86_BUILTIN_PMAXUD128,
27834 IX86_BUILTIN_PMAXUW128,
27835
27836 IX86_BUILTIN_PMINSB128,
27837 IX86_BUILTIN_PMINSD128,
27838 IX86_BUILTIN_PMINUD128,
27839 IX86_BUILTIN_PMINUW128,
27840
27841 IX86_BUILTIN_PMOVSXBW128,
27842 IX86_BUILTIN_PMOVSXBD128,
27843 IX86_BUILTIN_PMOVSXBQ128,
27844 IX86_BUILTIN_PMOVSXWD128,
27845 IX86_BUILTIN_PMOVSXWQ128,
27846 IX86_BUILTIN_PMOVSXDQ128,
27847
27848 IX86_BUILTIN_PMOVZXBW128,
27849 IX86_BUILTIN_PMOVZXBD128,
27850 IX86_BUILTIN_PMOVZXBQ128,
27851 IX86_BUILTIN_PMOVZXWD128,
27852 IX86_BUILTIN_PMOVZXWQ128,
27853 IX86_BUILTIN_PMOVZXDQ128,
27854
27855 IX86_BUILTIN_PMULDQ128,
27856 IX86_BUILTIN_PMULLD128,
27857
27858 IX86_BUILTIN_ROUNDSD,
27859 IX86_BUILTIN_ROUNDSS,
27860
27861 IX86_BUILTIN_ROUNDPD,
27862 IX86_BUILTIN_ROUNDPS,
27863
27864 IX86_BUILTIN_FLOORPD,
27865 IX86_BUILTIN_CEILPD,
27866 IX86_BUILTIN_TRUNCPD,
27867 IX86_BUILTIN_RINTPD,
27868 IX86_BUILTIN_ROUNDPD_AZ,
27869
27870 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27871 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27872 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27873
27874 IX86_BUILTIN_FLOORPS,
27875 IX86_BUILTIN_CEILPS,
27876 IX86_BUILTIN_TRUNCPS,
27877 IX86_BUILTIN_RINTPS,
27878 IX86_BUILTIN_ROUNDPS_AZ,
27879
27880 IX86_BUILTIN_FLOORPS_SFIX,
27881 IX86_BUILTIN_CEILPS_SFIX,
27882 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27883
27884 IX86_BUILTIN_PTESTZ,
27885 IX86_BUILTIN_PTESTC,
27886 IX86_BUILTIN_PTESTNZC,
27887
27888 IX86_BUILTIN_VEC_INIT_V2SI,
27889 IX86_BUILTIN_VEC_INIT_V4HI,
27890 IX86_BUILTIN_VEC_INIT_V8QI,
27891 IX86_BUILTIN_VEC_EXT_V2DF,
27892 IX86_BUILTIN_VEC_EXT_V2DI,
27893 IX86_BUILTIN_VEC_EXT_V4SF,
27894 IX86_BUILTIN_VEC_EXT_V4SI,
27895 IX86_BUILTIN_VEC_EXT_V8HI,
27896 IX86_BUILTIN_VEC_EXT_V2SI,
27897 IX86_BUILTIN_VEC_EXT_V4HI,
27898 IX86_BUILTIN_VEC_EXT_V16QI,
27899 IX86_BUILTIN_VEC_SET_V2DI,
27900 IX86_BUILTIN_VEC_SET_V4SF,
27901 IX86_BUILTIN_VEC_SET_V4SI,
27902 IX86_BUILTIN_VEC_SET_V8HI,
27903 IX86_BUILTIN_VEC_SET_V4HI,
27904 IX86_BUILTIN_VEC_SET_V16QI,
27905
27906 IX86_BUILTIN_VEC_PACK_SFIX,
27907 IX86_BUILTIN_VEC_PACK_SFIX256,
27908
27909 /* SSE4.2. */
27910 IX86_BUILTIN_CRC32QI,
27911 IX86_BUILTIN_CRC32HI,
27912 IX86_BUILTIN_CRC32SI,
27913 IX86_BUILTIN_CRC32DI,
27914
27915 IX86_BUILTIN_PCMPESTRI128,
27916 IX86_BUILTIN_PCMPESTRM128,
27917 IX86_BUILTIN_PCMPESTRA128,
27918 IX86_BUILTIN_PCMPESTRC128,
27919 IX86_BUILTIN_PCMPESTRO128,
27920 IX86_BUILTIN_PCMPESTRS128,
27921 IX86_BUILTIN_PCMPESTRZ128,
27922 IX86_BUILTIN_PCMPISTRI128,
27923 IX86_BUILTIN_PCMPISTRM128,
27924 IX86_BUILTIN_PCMPISTRA128,
27925 IX86_BUILTIN_PCMPISTRC128,
27926 IX86_BUILTIN_PCMPISTRO128,
27927 IX86_BUILTIN_PCMPISTRS128,
27928 IX86_BUILTIN_PCMPISTRZ128,
27929
27930 IX86_BUILTIN_PCMPGTQ,
27931
27932 /* AES instructions */
27933 IX86_BUILTIN_AESENC128,
27934 IX86_BUILTIN_AESENCLAST128,
27935 IX86_BUILTIN_AESDEC128,
27936 IX86_BUILTIN_AESDECLAST128,
27937 IX86_BUILTIN_AESIMC128,
27938 IX86_BUILTIN_AESKEYGENASSIST128,
27939
27940 /* PCLMUL instruction */
27941 IX86_BUILTIN_PCLMULQDQ128,
27942
27943 /* AVX */
27944 IX86_BUILTIN_ADDPD256,
27945 IX86_BUILTIN_ADDPS256,
27946 IX86_BUILTIN_ADDSUBPD256,
27947 IX86_BUILTIN_ADDSUBPS256,
27948 IX86_BUILTIN_ANDPD256,
27949 IX86_BUILTIN_ANDPS256,
27950 IX86_BUILTIN_ANDNPD256,
27951 IX86_BUILTIN_ANDNPS256,
27952 IX86_BUILTIN_BLENDPD256,
27953 IX86_BUILTIN_BLENDPS256,
27954 IX86_BUILTIN_BLENDVPD256,
27955 IX86_BUILTIN_BLENDVPS256,
27956 IX86_BUILTIN_DIVPD256,
27957 IX86_BUILTIN_DIVPS256,
27958 IX86_BUILTIN_DPPS256,
27959 IX86_BUILTIN_HADDPD256,
27960 IX86_BUILTIN_HADDPS256,
27961 IX86_BUILTIN_HSUBPD256,
27962 IX86_BUILTIN_HSUBPS256,
27963 IX86_BUILTIN_MAXPD256,
27964 IX86_BUILTIN_MAXPS256,
27965 IX86_BUILTIN_MINPD256,
27966 IX86_BUILTIN_MINPS256,
27967 IX86_BUILTIN_MULPD256,
27968 IX86_BUILTIN_MULPS256,
27969 IX86_BUILTIN_ORPD256,
27970 IX86_BUILTIN_ORPS256,
27971 IX86_BUILTIN_SHUFPD256,
27972 IX86_BUILTIN_SHUFPS256,
27973 IX86_BUILTIN_SUBPD256,
27974 IX86_BUILTIN_SUBPS256,
27975 IX86_BUILTIN_XORPD256,
27976 IX86_BUILTIN_XORPS256,
27977 IX86_BUILTIN_CMPSD,
27978 IX86_BUILTIN_CMPSS,
27979 IX86_BUILTIN_CMPPD,
27980 IX86_BUILTIN_CMPPS,
27981 IX86_BUILTIN_CMPPD256,
27982 IX86_BUILTIN_CMPPS256,
27983 IX86_BUILTIN_CVTDQ2PD256,
27984 IX86_BUILTIN_CVTDQ2PS256,
27985 IX86_BUILTIN_CVTPD2PS256,
27986 IX86_BUILTIN_CVTPS2DQ256,
27987 IX86_BUILTIN_CVTPS2PD256,
27988 IX86_BUILTIN_CVTTPD2DQ256,
27989 IX86_BUILTIN_CVTPD2DQ256,
27990 IX86_BUILTIN_CVTTPS2DQ256,
27991 IX86_BUILTIN_EXTRACTF128PD256,
27992 IX86_BUILTIN_EXTRACTF128PS256,
27993 IX86_BUILTIN_EXTRACTF128SI256,
27994 IX86_BUILTIN_VZEROALL,
27995 IX86_BUILTIN_VZEROUPPER,
27996 IX86_BUILTIN_VPERMILVARPD,
27997 IX86_BUILTIN_VPERMILVARPS,
27998 IX86_BUILTIN_VPERMILVARPD256,
27999 IX86_BUILTIN_VPERMILVARPS256,
28000 IX86_BUILTIN_VPERMILPD,
28001 IX86_BUILTIN_VPERMILPS,
28002 IX86_BUILTIN_VPERMILPD256,
28003 IX86_BUILTIN_VPERMILPS256,
28004 IX86_BUILTIN_VPERMIL2PD,
28005 IX86_BUILTIN_VPERMIL2PS,
28006 IX86_BUILTIN_VPERMIL2PD256,
28007 IX86_BUILTIN_VPERMIL2PS256,
28008 IX86_BUILTIN_VPERM2F128PD256,
28009 IX86_BUILTIN_VPERM2F128PS256,
28010 IX86_BUILTIN_VPERM2F128SI256,
28011 IX86_BUILTIN_VBROADCASTSS,
28012 IX86_BUILTIN_VBROADCASTSD256,
28013 IX86_BUILTIN_VBROADCASTSS256,
28014 IX86_BUILTIN_VBROADCASTPD256,
28015 IX86_BUILTIN_VBROADCASTPS256,
28016 IX86_BUILTIN_VINSERTF128PD256,
28017 IX86_BUILTIN_VINSERTF128PS256,
28018 IX86_BUILTIN_VINSERTF128SI256,
28019 IX86_BUILTIN_LOADUPD256,
28020 IX86_BUILTIN_LOADUPS256,
28021 IX86_BUILTIN_STOREUPD256,
28022 IX86_BUILTIN_STOREUPS256,
28023 IX86_BUILTIN_LDDQU256,
28024 IX86_BUILTIN_MOVNTDQ256,
28025 IX86_BUILTIN_MOVNTPD256,
28026 IX86_BUILTIN_MOVNTPS256,
28027 IX86_BUILTIN_LOADDQU256,
28028 IX86_BUILTIN_STOREDQU256,
28029 IX86_BUILTIN_MASKLOADPD,
28030 IX86_BUILTIN_MASKLOADPS,
28031 IX86_BUILTIN_MASKSTOREPD,
28032 IX86_BUILTIN_MASKSTOREPS,
28033 IX86_BUILTIN_MASKLOADPD256,
28034 IX86_BUILTIN_MASKLOADPS256,
28035 IX86_BUILTIN_MASKSTOREPD256,
28036 IX86_BUILTIN_MASKSTOREPS256,
28037 IX86_BUILTIN_MOVSHDUP256,
28038 IX86_BUILTIN_MOVSLDUP256,
28039 IX86_BUILTIN_MOVDDUP256,
28040
28041 IX86_BUILTIN_SQRTPD256,
28042 IX86_BUILTIN_SQRTPS256,
28043 IX86_BUILTIN_SQRTPS_NR256,
28044 IX86_BUILTIN_RSQRTPS256,
28045 IX86_BUILTIN_RSQRTPS_NR256,
28046
28047 IX86_BUILTIN_RCPPS256,
28048
28049 IX86_BUILTIN_ROUNDPD256,
28050 IX86_BUILTIN_ROUNDPS256,
28051
28052 IX86_BUILTIN_FLOORPD256,
28053 IX86_BUILTIN_CEILPD256,
28054 IX86_BUILTIN_TRUNCPD256,
28055 IX86_BUILTIN_RINTPD256,
28056 IX86_BUILTIN_ROUNDPD_AZ256,
28057
28058 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28059 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28060 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28061
28062 IX86_BUILTIN_FLOORPS256,
28063 IX86_BUILTIN_CEILPS256,
28064 IX86_BUILTIN_TRUNCPS256,
28065 IX86_BUILTIN_RINTPS256,
28066 IX86_BUILTIN_ROUNDPS_AZ256,
28067
28068 IX86_BUILTIN_FLOORPS_SFIX256,
28069 IX86_BUILTIN_CEILPS_SFIX256,
28070 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28071
28072 IX86_BUILTIN_UNPCKHPD256,
28073 IX86_BUILTIN_UNPCKLPD256,
28074 IX86_BUILTIN_UNPCKHPS256,
28075 IX86_BUILTIN_UNPCKLPS256,
28076
28077 IX86_BUILTIN_SI256_SI,
28078 IX86_BUILTIN_PS256_PS,
28079 IX86_BUILTIN_PD256_PD,
28080 IX86_BUILTIN_SI_SI256,
28081 IX86_BUILTIN_PS_PS256,
28082 IX86_BUILTIN_PD_PD256,
28083
28084 IX86_BUILTIN_VTESTZPD,
28085 IX86_BUILTIN_VTESTCPD,
28086 IX86_BUILTIN_VTESTNZCPD,
28087 IX86_BUILTIN_VTESTZPS,
28088 IX86_BUILTIN_VTESTCPS,
28089 IX86_BUILTIN_VTESTNZCPS,
28090 IX86_BUILTIN_VTESTZPD256,
28091 IX86_BUILTIN_VTESTCPD256,
28092 IX86_BUILTIN_VTESTNZCPD256,
28093 IX86_BUILTIN_VTESTZPS256,
28094 IX86_BUILTIN_VTESTCPS256,
28095 IX86_BUILTIN_VTESTNZCPS256,
28096 IX86_BUILTIN_PTESTZ256,
28097 IX86_BUILTIN_PTESTC256,
28098 IX86_BUILTIN_PTESTNZC256,
28099
28100 IX86_BUILTIN_MOVMSKPD256,
28101 IX86_BUILTIN_MOVMSKPS256,
28102
28103 /* AVX2 */
28104 IX86_BUILTIN_MPSADBW256,
28105 IX86_BUILTIN_PABSB256,
28106 IX86_BUILTIN_PABSW256,
28107 IX86_BUILTIN_PABSD256,
28108 IX86_BUILTIN_PACKSSDW256,
28109 IX86_BUILTIN_PACKSSWB256,
28110 IX86_BUILTIN_PACKUSDW256,
28111 IX86_BUILTIN_PACKUSWB256,
28112 IX86_BUILTIN_PADDB256,
28113 IX86_BUILTIN_PADDW256,
28114 IX86_BUILTIN_PADDD256,
28115 IX86_BUILTIN_PADDQ256,
28116 IX86_BUILTIN_PADDSB256,
28117 IX86_BUILTIN_PADDSW256,
28118 IX86_BUILTIN_PADDUSB256,
28119 IX86_BUILTIN_PADDUSW256,
28120 IX86_BUILTIN_PALIGNR256,
28121 IX86_BUILTIN_AND256I,
28122 IX86_BUILTIN_ANDNOT256I,
28123 IX86_BUILTIN_PAVGB256,
28124 IX86_BUILTIN_PAVGW256,
28125 IX86_BUILTIN_PBLENDVB256,
28126 IX86_BUILTIN_PBLENDVW256,
28127 IX86_BUILTIN_PCMPEQB256,
28128 IX86_BUILTIN_PCMPEQW256,
28129 IX86_BUILTIN_PCMPEQD256,
28130 IX86_BUILTIN_PCMPEQQ256,
28131 IX86_BUILTIN_PCMPGTB256,
28132 IX86_BUILTIN_PCMPGTW256,
28133 IX86_BUILTIN_PCMPGTD256,
28134 IX86_BUILTIN_PCMPGTQ256,
28135 IX86_BUILTIN_PHADDW256,
28136 IX86_BUILTIN_PHADDD256,
28137 IX86_BUILTIN_PHADDSW256,
28138 IX86_BUILTIN_PHSUBW256,
28139 IX86_BUILTIN_PHSUBD256,
28140 IX86_BUILTIN_PHSUBSW256,
28141 IX86_BUILTIN_PMADDUBSW256,
28142 IX86_BUILTIN_PMADDWD256,
28143 IX86_BUILTIN_PMAXSB256,
28144 IX86_BUILTIN_PMAXSW256,
28145 IX86_BUILTIN_PMAXSD256,
28146 IX86_BUILTIN_PMAXUB256,
28147 IX86_BUILTIN_PMAXUW256,
28148 IX86_BUILTIN_PMAXUD256,
28149 IX86_BUILTIN_PMINSB256,
28150 IX86_BUILTIN_PMINSW256,
28151 IX86_BUILTIN_PMINSD256,
28152 IX86_BUILTIN_PMINUB256,
28153 IX86_BUILTIN_PMINUW256,
28154 IX86_BUILTIN_PMINUD256,
28155 IX86_BUILTIN_PMOVMSKB256,
28156 IX86_BUILTIN_PMOVSXBW256,
28157 IX86_BUILTIN_PMOVSXBD256,
28158 IX86_BUILTIN_PMOVSXBQ256,
28159 IX86_BUILTIN_PMOVSXWD256,
28160 IX86_BUILTIN_PMOVSXWQ256,
28161 IX86_BUILTIN_PMOVSXDQ256,
28162 IX86_BUILTIN_PMOVZXBW256,
28163 IX86_BUILTIN_PMOVZXBD256,
28164 IX86_BUILTIN_PMOVZXBQ256,
28165 IX86_BUILTIN_PMOVZXWD256,
28166 IX86_BUILTIN_PMOVZXWQ256,
28167 IX86_BUILTIN_PMOVZXDQ256,
28168 IX86_BUILTIN_PMULDQ256,
28169 IX86_BUILTIN_PMULHRSW256,
28170 IX86_BUILTIN_PMULHUW256,
28171 IX86_BUILTIN_PMULHW256,
28172 IX86_BUILTIN_PMULLW256,
28173 IX86_BUILTIN_PMULLD256,
28174 IX86_BUILTIN_PMULUDQ256,
28175 IX86_BUILTIN_POR256,
28176 IX86_BUILTIN_PSADBW256,
28177 IX86_BUILTIN_PSHUFB256,
28178 IX86_BUILTIN_PSHUFD256,
28179 IX86_BUILTIN_PSHUFHW256,
28180 IX86_BUILTIN_PSHUFLW256,
28181 IX86_BUILTIN_PSIGNB256,
28182 IX86_BUILTIN_PSIGNW256,
28183 IX86_BUILTIN_PSIGND256,
28184 IX86_BUILTIN_PSLLDQI256,
28185 IX86_BUILTIN_PSLLWI256,
28186 IX86_BUILTIN_PSLLW256,
28187 IX86_BUILTIN_PSLLDI256,
28188 IX86_BUILTIN_PSLLD256,
28189 IX86_BUILTIN_PSLLQI256,
28190 IX86_BUILTIN_PSLLQ256,
28191 IX86_BUILTIN_PSRAWI256,
28192 IX86_BUILTIN_PSRAW256,
28193 IX86_BUILTIN_PSRADI256,
28194 IX86_BUILTIN_PSRAD256,
28195 IX86_BUILTIN_PSRLDQI256,
28196 IX86_BUILTIN_PSRLWI256,
28197 IX86_BUILTIN_PSRLW256,
28198 IX86_BUILTIN_PSRLDI256,
28199 IX86_BUILTIN_PSRLD256,
28200 IX86_BUILTIN_PSRLQI256,
28201 IX86_BUILTIN_PSRLQ256,
28202 IX86_BUILTIN_PSUBB256,
28203 IX86_BUILTIN_PSUBW256,
28204 IX86_BUILTIN_PSUBD256,
28205 IX86_BUILTIN_PSUBQ256,
28206 IX86_BUILTIN_PSUBSB256,
28207 IX86_BUILTIN_PSUBSW256,
28208 IX86_BUILTIN_PSUBUSB256,
28209 IX86_BUILTIN_PSUBUSW256,
28210 IX86_BUILTIN_PUNPCKHBW256,
28211 IX86_BUILTIN_PUNPCKHWD256,
28212 IX86_BUILTIN_PUNPCKHDQ256,
28213 IX86_BUILTIN_PUNPCKHQDQ256,
28214 IX86_BUILTIN_PUNPCKLBW256,
28215 IX86_BUILTIN_PUNPCKLWD256,
28216 IX86_BUILTIN_PUNPCKLDQ256,
28217 IX86_BUILTIN_PUNPCKLQDQ256,
28218 IX86_BUILTIN_PXOR256,
28219 IX86_BUILTIN_MOVNTDQA256,
28220 IX86_BUILTIN_VBROADCASTSS_PS,
28221 IX86_BUILTIN_VBROADCASTSS_PS256,
28222 IX86_BUILTIN_VBROADCASTSD_PD256,
28223 IX86_BUILTIN_VBROADCASTSI256,
28224 IX86_BUILTIN_PBLENDD256,
28225 IX86_BUILTIN_PBLENDD128,
28226 IX86_BUILTIN_PBROADCASTB256,
28227 IX86_BUILTIN_PBROADCASTW256,
28228 IX86_BUILTIN_PBROADCASTD256,
28229 IX86_BUILTIN_PBROADCASTQ256,
28230 IX86_BUILTIN_PBROADCASTB128,
28231 IX86_BUILTIN_PBROADCASTW128,
28232 IX86_BUILTIN_PBROADCASTD128,
28233 IX86_BUILTIN_PBROADCASTQ128,
28234 IX86_BUILTIN_VPERMVARSI256,
28235 IX86_BUILTIN_VPERMDF256,
28236 IX86_BUILTIN_VPERMVARSF256,
28237 IX86_BUILTIN_VPERMDI256,
28238 IX86_BUILTIN_VPERMTI256,
28239 IX86_BUILTIN_VEXTRACT128I256,
28240 IX86_BUILTIN_VINSERT128I256,
28241 IX86_BUILTIN_MASKLOADD,
28242 IX86_BUILTIN_MASKLOADQ,
28243 IX86_BUILTIN_MASKLOADD256,
28244 IX86_BUILTIN_MASKLOADQ256,
28245 IX86_BUILTIN_MASKSTORED,
28246 IX86_BUILTIN_MASKSTOREQ,
28247 IX86_BUILTIN_MASKSTORED256,
28248 IX86_BUILTIN_MASKSTOREQ256,
28249 IX86_BUILTIN_PSLLVV4DI,
28250 IX86_BUILTIN_PSLLVV2DI,
28251 IX86_BUILTIN_PSLLVV8SI,
28252 IX86_BUILTIN_PSLLVV4SI,
28253 IX86_BUILTIN_PSRAVV8SI,
28254 IX86_BUILTIN_PSRAVV4SI,
28255 IX86_BUILTIN_PSRLVV4DI,
28256 IX86_BUILTIN_PSRLVV2DI,
28257 IX86_BUILTIN_PSRLVV8SI,
28258 IX86_BUILTIN_PSRLVV4SI,
28259
28260 IX86_BUILTIN_GATHERSIV2DF,
28261 IX86_BUILTIN_GATHERSIV4DF,
28262 IX86_BUILTIN_GATHERDIV2DF,
28263 IX86_BUILTIN_GATHERDIV4DF,
28264 IX86_BUILTIN_GATHERSIV4SF,
28265 IX86_BUILTIN_GATHERSIV8SF,
28266 IX86_BUILTIN_GATHERDIV4SF,
28267 IX86_BUILTIN_GATHERDIV8SF,
28268 IX86_BUILTIN_GATHERSIV2DI,
28269 IX86_BUILTIN_GATHERSIV4DI,
28270 IX86_BUILTIN_GATHERDIV2DI,
28271 IX86_BUILTIN_GATHERDIV4DI,
28272 IX86_BUILTIN_GATHERSIV4SI,
28273 IX86_BUILTIN_GATHERSIV8SI,
28274 IX86_BUILTIN_GATHERDIV4SI,
28275 IX86_BUILTIN_GATHERDIV8SI,
28276
28277 /* AVX512F */
28278 IX86_BUILTIN_SI512_SI256,
28279 IX86_BUILTIN_PD512_PD256,
28280 IX86_BUILTIN_PS512_PS256,
28281 IX86_BUILTIN_SI512_SI,
28282 IX86_BUILTIN_PD512_PD,
28283 IX86_BUILTIN_PS512_PS,
28284 IX86_BUILTIN_ADDPD512,
28285 IX86_BUILTIN_ADDPS512,
28286 IX86_BUILTIN_ADDSD_ROUND,
28287 IX86_BUILTIN_ADDSS_ROUND,
28288 IX86_BUILTIN_ALIGND512,
28289 IX86_BUILTIN_ALIGNQ512,
28290 IX86_BUILTIN_BLENDMD512,
28291 IX86_BUILTIN_BLENDMPD512,
28292 IX86_BUILTIN_BLENDMPS512,
28293 IX86_BUILTIN_BLENDMQ512,
28294 IX86_BUILTIN_BROADCASTF32X4_512,
28295 IX86_BUILTIN_BROADCASTF64X4_512,
28296 IX86_BUILTIN_BROADCASTI32X4_512,
28297 IX86_BUILTIN_BROADCASTI64X4_512,
28298 IX86_BUILTIN_BROADCASTSD512,
28299 IX86_BUILTIN_BROADCASTSS512,
28300 IX86_BUILTIN_CMPD512,
28301 IX86_BUILTIN_CMPPD512,
28302 IX86_BUILTIN_CMPPS512,
28303 IX86_BUILTIN_CMPQ512,
28304 IX86_BUILTIN_CMPSD_MASK,
28305 IX86_BUILTIN_CMPSS_MASK,
28306 IX86_BUILTIN_COMIDF,
28307 IX86_BUILTIN_COMISF,
28308 IX86_BUILTIN_COMPRESSPD512,
28309 IX86_BUILTIN_COMPRESSPDSTORE512,
28310 IX86_BUILTIN_COMPRESSPS512,
28311 IX86_BUILTIN_COMPRESSPSSTORE512,
28312 IX86_BUILTIN_CVTDQ2PD512,
28313 IX86_BUILTIN_CVTDQ2PS512,
28314 IX86_BUILTIN_CVTPD2DQ512,
28315 IX86_BUILTIN_CVTPD2PS512,
28316 IX86_BUILTIN_CVTPD2UDQ512,
28317 IX86_BUILTIN_CVTPH2PS512,
28318 IX86_BUILTIN_CVTPS2DQ512,
28319 IX86_BUILTIN_CVTPS2PD512,
28320 IX86_BUILTIN_CVTPS2PH512,
28321 IX86_BUILTIN_CVTPS2UDQ512,
28322 IX86_BUILTIN_CVTSD2SS_ROUND,
28323 IX86_BUILTIN_CVTSI2SD64,
28324 IX86_BUILTIN_CVTSI2SS32,
28325 IX86_BUILTIN_CVTSI2SS64,
28326 IX86_BUILTIN_CVTSS2SD_ROUND,
28327 IX86_BUILTIN_CVTTPD2DQ512,
28328 IX86_BUILTIN_CVTTPD2UDQ512,
28329 IX86_BUILTIN_CVTTPS2DQ512,
28330 IX86_BUILTIN_CVTTPS2UDQ512,
28331 IX86_BUILTIN_CVTUDQ2PD512,
28332 IX86_BUILTIN_CVTUDQ2PS512,
28333 IX86_BUILTIN_CVTUSI2SD32,
28334 IX86_BUILTIN_CVTUSI2SD64,
28335 IX86_BUILTIN_CVTUSI2SS32,
28336 IX86_BUILTIN_CVTUSI2SS64,
28337 IX86_BUILTIN_DIVPD512,
28338 IX86_BUILTIN_DIVPS512,
28339 IX86_BUILTIN_DIVSD_ROUND,
28340 IX86_BUILTIN_DIVSS_ROUND,
28341 IX86_BUILTIN_EXPANDPD512,
28342 IX86_BUILTIN_EXPANDPD512Z,
28343 IX86_BUILTIN_EXPANDPDLOAD512,
28344 IX86_BUILTIN_EXPANDPDLOAD512Z,
28345 IX86_BUILTIN_EXPANDPS512,
28346 IX86_BUILTIN_EXPANDPS512Z,
28347 IX86_BUILTIN_EXPANDPSLOAD512,
28348 IX86_BUILTIN_EXPANDPSLOAD512Z,
28349 IX86_BUILTIN_EXTRACTF32X4,
28350 IX86_BUILTIN_EXTRACTF64X4,
28351 IX86_BUILTIN_EXTRACTI32X4,
28352 IX86_BUILTIN_EXTRACTI64X4,
28353 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28354 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28355 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28356 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28357 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28358 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28359 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28360 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28361 IX86_BUILTIN_GETEXPPD512,
28362 IX86_BUILTIN_GETEXPPS512,
28363 IX86_BUILTIN_GETEXPSD128,
28364 IX86_BUILTIN_GETEXPSS128,
28365 IX86_BUILTIN_GETMANTPD512,
28366 IX86_BUILTIN_GETMANTPS512,
28367 IX86_BUILTIN_GETMANTSD128,
28368 IX86_BUILTIN_GETMANTSS128,
28369 IX86_BUILTIN_INSERTF32X4,
28370 IX86_BUILTIN_INSERTF64X4,
28371 IX86_BUILTIN_INSERTI32X4,
28372 IX86_BUILTIN_INSERTI64X4,
28373 IX86_BUILTIN_LOADAPD512,
28374 IX86_BUILTIN_LOADAPS512,
28375 IX86_BUILTIN_LOADDQUDI512,
28376 IX86_BUILTIN_LOADDQUSI512,
28377 IX86_BUILTIN_LOADUPD512,
28378 IX86_BUILTIN_LOADUPS512,
28379 IX86_BUILTIN_MAXPD512,
28380 IX86_BUILTIN_MAXPS512,
28381 IX86_BUILTIN_MAXSD_ROUND,
28382 IX86_BUILTIN_MAXSS_ROUND,
28383 IX86_BUILTIN_MINPD512,
28384 IX86_BUILTIN_MINPS512,
28385 IX86_BUILTIN_MINSD_ROUND,
28386 IX86_BUILTIN_MINSS_ROUND,
28387 IX86_BUILTIN_MOVAPD512,
28388 IX86_BUILTIN_MOVAPS512,
28389 IX86_BUILTIN_MOVDDUP512,
28390 IX86_BUILTIN_MOVDQA32LOAD512,
28391 IX86_BUILTIN_MOVDQA32STORE512,
28392 IX86_BUILTIN_MOVDQA32_512,
28393 IX86_BUILTIN_MOVDQA64LOAD512,
28394 IX86_BUILTIN_MOVDQA64STORE512,
28395 IX86_BUILTIN_MOVDQA64_512,
28396 IX86_BUILTIN_MOVNTDQ512,
28397 IX86_BUILTIN_MOVNTDQA512,
28398 IX86_BUILTIN_MOVNTPD512,
28399 IX86_BUILTIN_MOVNTPS512,
28400 IX86_BUILTIN_MOVSHDUP512,
28401 IX86_BUILTIN_MOVSLDUP512,
28402 IX86_BUILTIN_MULPD512,
28403 IX86_BUILTIN_MULPS512,
28404 IX86_BUILTIN_MULSD_ROUND,
28405 IX86_BUILTIN_MULSS_ROUND,
28406 IX86_BUILTIN_PABSD512,
28407 IX86_BUILTIN_PABSQ512,
28408 IX86_BUILTIN_PADDD512,
28409 IX86_BUILTIN_PADDQ512,
28410 IX86_BUILTIN_PANDD512,
28411 IX86_BUILTIN_PANDND512,
28412 IX86_BUILTIN_PANDNQ512,
28413 IX86_BUILTIN_PANDQ512,
28414 IX86_BUILTIN_PBROADCASTD512,
28415 IX86_BUILTIN_PBROADCASTD512_GPR,
28416 IX86_BUILTIN_PBROADCASTMB512,
28417 IX86_BUILTIN_PBROADCASTMW512,
28418 IX86_BUILTIN_PBROADCASTQ512,
28419 IX86_BUILTIN_PBROADCASTQ512_GPR,
28420 IX86_BUILTIN_PBROADCASTQ512_MEM,
28421 IX86_BUILTIN_PCMPEQD512_MASK,
28422 IX86_BUILTIN_PCMPEQQ512_MASK,
28423 IX86_BUILTIN_PCMPGTD512_MASK,
28424 IX86_BUILTIN_PCMPGTQ512_MASK,
28425 IX86_BUILTIN_PCOMPRESSD512,
28426 IX86_BUILTIN_PCOMPRESSDSTORE512,
28427 IX86_BUILTIN_PCOMPRESSQ512,
28428 IX86_BUILTIN_PCOMPRESSQSTORE512,
28429 IX86_BUILTIN_PEXPANDD512,
28430 IX86_BUILTIN_PEXPANDD512Z,
28431 IX86_BUILTIN_PEXPANDDLOAD512,
28432 IX86_BUILTIN_PEXPANDDLOAD512Z,
28433 IX86_BUILTIN_PEXPANDQ512,
28434 IX86_BUILTIN_PEXPANDQ512Z,
28435 IX86_BUILTIN_PEXPANDQLOAD512,
28436 IX86_BUILTIN_PEXPANDQLOAD512Z,
28437 IX86_BUILTIN_PMAXSD512,
28438 IX86_BUILTIN_PMAXSQ512,
28439 IX86_BUILTIN_PMAXUD512,
28440 IX86_BUILTIN_PMAXUQ512,
28441 IX86_BUILTIN_PMINSD512,
28442 IX86_BUILTIN_PMINSQ512,
28443 IX86_BUILTIN_PMINUD512,
28444 IX86_BUILTIN_PMINUQ512,
28445 IX86_BUILTIN_PMOVDB512,
28446 IX86_BUILTIN_PMOVDB512_MEM,
28447 IX86_BUILTIN_PMOVDW512,
28448 IX86_BUILTIN_PMOVDW512_MEM,
28449 IX86_BUILTIN_PMOVQB512,
28450 IX86_BUILTIN_PMOVQB512_MEM,
28451 IX86_BUILTIN_PMOVQD512,
28452 IX86_BUILTIN_PMOVQD512_MEM,
28453 IX86_BUILTIN_PMOVQW512,
28454 IX86_BUILTIN_PMOVQW512_MEM,
28455 IX86_BUILTIN_PMOVSDB512,
28456 IX86_BUILTIN_PMOVSDB512_MEM,
28457 IX86_BUILTIN_PMOVSDW512,
28458 IX86_BUILTIN_PMOVSDW512_MEM,
28459 IX86_BUILTIN_PMOVSQB512,
28460 IX86_BUILTIN_PMOVSQB512_MEM,
28461 IX86_BUILTIN_PMOVSQD512,
28462 IX86_BUILTIN_PMOVSQD512_MEM,
28463 IX86_BUILTIN_PMOVSQW512,
28464 IX86_BUILTIN_PMOVSQW512_MEM,
28465 IX86_BUILTIN_PMOVSXBD512,
28466 IX86_BUILTIN_PMOVSXBQ512,
28467 IX86_BUILTIN_PMOVSXDQ512,
28468 IX86_BUILTIN_PMOVSXWD512,
28469 IX86_BUILTIN_PMOVSXWQ512,
28470 IX86_BUILTIN_PMOVUSDB512,
28471 IX86_BUILTIN_PMOVUSDB512_MEM,
28472 IX86_BUILTIN_PMOVUSDW512,
28473 IX86_BUILTIN_PMOVUSDW512_MEM,
28474 IX86_BUILTIN_PMOVUSQB512,
28475 IX86_BUILTIN_PMOVUSQB512_MEM,
28476 IX86_BUILTIN_PMOVUSQD512,
28477 IX86_BUILTIN_PMOVUSQD512_MEM,
28478 IX86_BUILTIN_PMOVUSQW512,
28479 IX86_BUILTIN_PMOVUSQW512_MEM,
28480 IX86_BUILTIN_PMOVZXBD512,
28481 IX86_BUILTIN_PMOVZXBQ512,
28482 IX86_BUILTIN_PMOVZXDQ512,
28483 IX86_BUILTIN_PMOVZXWD512,
28484 IX86_BUILTIN_PMOVZXWQ512,
28485 IX86_BUILTIN_PMULDQ512,
28486 IX86_BUILTIN_PMULLD512,
28487 IX86_BUILTIN_PMULUDQ512,
28488 IX86_BUILTIN_PORD512,
28489 IX86_BUILTIN_PORQ512,
28490 IX86_BUILTIN_PROLD512,
28491 IX86_BUILTIN_PROLQ512,
28492 IX86_BUILTIN_PROLVD512,
28493 IX86_BUILTIN_PROLVQ512,
28494 IX86_BUILTIN_PRORD512,
28495 IX86_BUILTIN_PRORQ512,
28496 IX86_BUILTIN_PRORVD512,
28497 IX86_BUILTIN_PRORVQ512,
28498 IX86_BUILTIN_PSHUFD512,
28499 IX86_BUILTIN_PSLLD512,
28500 IX86_BUILTIN_PSLLDI512,
28501 IX86_BUILTIN_PSLLQ512,
28502 IX86_BUILTIN_PSLLQI512,
28503 IX86_BUILTIN_PSLLVV16SI,
28504 IX86_BUILTIN_PSLLVV8DI,
28505 IX86_BUILTIN_PSRAD512,
28506 IX86_BUILTIN_PSRADI512,
28507 IX86_BUILTIN_PSRAQ512,
28508 IX86_BUILTIN_PSRAQI512,
28509 IX86_BUILTIN_PSRAVV16SI,
28510 IX86_BUILTIN_PSRAVV8DI,
28511 IX86_BUILTIN_PSRLD512,
28512 IX86_BUILTIN_PSRLDI512,
28513 IX86_BUILTIN_PSRLQ512,
28514 IX86_BUILTIN_PSRLQI512,
28515 IX86_BUILTIN_PSRLVV16SI,
28516 IX86_BUILTIN_PSRLVV8DI,
28517 IX86_BUILTIN_PSUBD512,
28518 IX86_BUILTIN_PSUBQ512,
28519 IX86_BUILTIN_PTESTMD512,
28520 IX86_BUILTIN_PTESTMQ512,
28521 IX86_BUILTIN_PTESTNMD512,
28522 IX86_BUILTIN_PTESTNMQ512,
28523 IX86_BUILTIN_PUNPCKHDQ512,
28524 IX86_BUILTIN_PUNPCKHQDQ512,
28525 IX86_BUILTIN_PUNPCKLDQ512,
28526 IX86_BUILTIN_PUNPCKLQDQ512,
28527 IX86_BUILTIN_PXORD512,
28528 IX86_BUILTIN_PXORQ512,
28529 IX86_BUILTIN_RCP14PD512,
28530 IX86_BUILTIN_RCP14PS512,
28531 IX86_BUILTIN_RCP14SD,
28532 IX86_BUILTIN_RCP14SS,
28533 IX86_BUILTIN_RNDSCALEPD,
28534 IX86_BUILTIN_RNDSCALEPS,
28535 IX86_BUILTIN_RNDSCALESD,
28536 IX86_BUILTIN_RNDSCALESS,
28537 IX86_BUILTIN_RSQRT14PD512,
28538 IX86_BUILTIN_RSQRT14PS512,
28539 IX86_BUILTIN_RSQRT14SD,
28540 IX86_BUILTIN_RSQRT14SS,
28541 IX86_BUILTIN_SCALEFPD512,
28542 IX86_BUILTIN_SCALEFPS512,
28543 IX86_BUILTIN_SCALEFSD,
28544 IX86_BUILTIN_SCALEFSS,
28545 IX86_BUILTIN_SHUFPD512,
28546 IX86_BUILTIN_SHUFPS512,
28547 IX86_BUILTIN_SHUF_F32x4,
28548 IX86_BUILTIN_SHUF_F64x2,
28549 IX86_BUILTIN_SHUF_I32x4,
28550 IX86_BUILTIN_SHUF_I64x2,
28551 IX86_BUILTIN_SQRTPD512,
28552 IX86_BUILTIN_SQRTPD512_MASK,
28553 IX86_BUILTIN_SQRTPS512_MASK,
28554 IX86_BUILTIN_SQRTPS_NR512,
28555 IX86_BUILTIN_SQRTSD_ROUND,
28556 IX86_BUILTIN_SQRTSS_ROUND,
28557 IX86_BUILTIN_STOREAPD512,
28558 IX86_BUILTIN_STOREAPS512,
28559 IX86_BUILTIN_STOREDQUDI512,
28560 IX86_BUILTIN_STOREDQUSI512,
28561 IX86_BUILTIN_STOREUPD512,
28562 IX86_BUILTIN_STOREUPS512,
28563 IX86_BUILTIN_SUBPD512,
28564 IX86_BUILTIN_SUBPS512,
28565 IX86_BUILTIN_SUBSD_ROUND,
28566 IX86_BUILTIN_SUBSS_ROUND,
28567 IX86_BUILTIN_UCMPD512,
28568 IX86_BUILTIN_UCMPQ512,
28569 IX86_BUILTIN_UNPCKHPD512,
28570 IX86_BUILTIN_UNPCKHPS512,
28571 IX86_BUILTIN_UNPCKLPD512,
28572 IX86_BUILTIN_UNPCKLPS512,
28573 IX86_BUILTIN_VCVTSD2SI32,
28574 IX86_BUILTIN_VCVTSD2SI64,
28575 IX86_BUILTIN_VCVTSD2USI32,
28576 IX86_BUILTIN_VCVTSD2USI64,
28577 IX86_BUILTIN_VCVTSS2SI32,
28578 IX86_BUILTIN_VCVTSS2SI64,
28579 IX86_BUILTIN_VCVTSS2USI32,
28580 IX86_BUILTIN_VCVTSS2USI64,
28581 IX86_BUILTIN_VCVTTSD2SI32,
28582 IX86_BUILTIN_VCVTTSD2SI64,
28583 IX86_BUILTIN_VCVTTSD2USI32,
28584 IX86_BUILTIN_VCVTTSD2USI64,
28585 IX86_BUILTIN_VCVTTSS2SI32,
28586 IX86_BUILTIN_VCVTTSS2SI64,
28587 IX86_BUILTIN_VCVTTSS2USI32,
28588 IX86_BUILTIN_VCVTTSS2USI64,
28589 IX86_BUILTIN_VFMADDPD512_MASK,
28590 IX86_BUILTIN_VFMADDPD512_MASK3,
28591 IX86_BUILTIN_VFMADDPD512_MASKZ,
28592 IX86_BUILTIN_VFMADDPS512_MASK,
28593 IX86_BUILTIN_VFMADDPS512_MASK3,
28594 IX86_BUILTIN_VFMADDPS512_MASKZ,
28595 IX86_BUILTIN_VFMADDSD3_ROUND,
28596 IX86_BUILTIN_VFMADDSS3_ROUND,
28597 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28598 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28599 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28600 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28601 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28602 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28603 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28604 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28605 IX86_BUILTIN_VFMSUBPD512_MASK3,
28606 IX86_BUILTIN_VFMSUBPS512_MASK3,
28607 IX86_BUILTIN_VFMSUBSD3_MASK3,
28608 IX86_BUILTIN_VFMSUBSS3_MASK3,
28609 IX86_BUILTIN_VFNMADDPD512_MASK,
28610 IX86_BUILTIN_VFNMADDPS512_MASK,
28611 IX86_BUILTIN_VFNMSUBPD512_MASK,
28612 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28613 IX86_BUILTIN_VFNMSUBPS512_MASK,
28614 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28615 IX86_BUILTIN_VPCLZCNTD512,
28616 IX86_BUILTIN_VPCLZCNTQ512,
28617 IX86_BUILTIN_VPCONFLICTD512,
28618 IX86_BUILTIN_VPCONFLICTQ512,
28619 IX86_BUILTIN_VPERMDF512,
28620 IX86_BUILTIN_VPERMDI512,
28621 IX86_BUILTIN_VPERMI2VARD512,
28622 IX86_BUILTIN_VPERMI2VARPD512,
28623 IX86_BUILTIN_VPERMI2VARPS512,
28624 IX86_BUILTIN_VPERMI2VARQ512,
28625 IX86_BUILTIN_VPERMILPD512,
28626 IX86_BUILTIN_VPERMILPS512,
28627 IX86_BUILTIN_VPERMILVARPD512,
28628 IX86_BUILTIN_VPERMILVARPS512,
28629 IX86_BUILTIN_VPERMT2VARD512,
28630 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28631 IX86_BUILTIN_VPERMT2VARPD512,
28632 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28633 IX86_BUILTIN_VPERMT2VARPS512,
28634 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28635 IX86_BUILTIN_VPERMT2VARQ512,
28636 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28637 IX86_BUILTIN_VPERMVARDF512,
28638 IX86_BUILTIN_VPERMVARDI512,
28639 IX86_BUILTIN_VPERMVARSF512,
28640 IX86_BUILTIN_VPERMVARSI512,
28641 IX86_BUILTIN_VTERNLOGD512_MASK,
28642 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28643 IX86_BUILTIN_VTERNLOGQ512_MASK,
28644 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28645
28646 /* Mask arithmetic operations */
28647 IX86_BUILTIN_KAND16,
28648 IX86_BUILTIN_KANDN16,
28649 IX86_BUILTIN_KNOT16,
28650 IX86_BUILTIN_KOR16,
28651 IX86_BUILTIN_KORTESTC16,
28652 IX86_BUILTIN_KORTESTZ16,
28653 IX86_BUILTIN_KUNPCKBW,
28654 IX86_BUILTIN_KXNOR16,
28655 IX86_BUILTIN_KXOR16,
28656 IX86_BUILTIN_KMOV16,
28657
28658 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28659 where all operands are 32-byte or 64-byte wide respectively. */
28660 IX86_BUILTIN_GATHERALTSIV4DF,
28661 IX86_BUILTIN_GATHERALTDIV8SF,
28662 IX86_BUILTIN_GATHERALTSIV4DI,
28663 IX86_BUILTIN_GATHERALTDIV8SI,
28664 IX86_BUILTIN_GATHER3ALTDIV16SF,
28665 IX86_BUILTIN_GATHER3ALTDIV16SI,
28666 IX86_BUILTIN_GATHER3ALTSIV8DF,
28667 IX86_BUILTIN_GATHER3ALTSIV8DI,
28668 IX86_BUILTIN_GATHER3DIV16SF,
28669 IX86_BUILTIN_GATHER3DIV16SI,
28670 IX86_BUILTIN_GATHER3DIV8DF,
28671 IX86_BUILTIN_GATHER3DIV8DI,
28672 IX86_BUILTIN_GATHER3SIV16SF,
28673 IX86_BUILTIN_GATHER3SIV16SI,
28674 IX86_BUILTIN_GATHER3SIV8DF,
28675 IX86_BUILTIN_GATHER3SIV8DI,
28676 IX86_BUILTIN_SCATTERDIV16SF,
28677 IX86_BUILTIN_SCATTERDIV16SI,
28678 IX86_BUILTIN_SCATTERDIV8DF,
28679 IX86_BUILTIN_SCATTERDIV8DI,
28680 IX86_BUILTIN_SCATTERSIV16SF,
28681 IX86_BUILTIN_SCATTERSIV16SI,
28682 IX86_BUILTIN_SCATTERSIV8DF,
28683 IX86_BUILTIN_SCATTERSIV8DI,
28684
28685 /* AVX512PF */
28686 IX86_BUILTIN_GATHERPFQPD,
28687 IX86_BUILTIN_GATHERPFDPS,
28688 IX86_BUILTIN_GATHERPFDPD,
28689 IX86_BUILTIN_GATHERPFQPS,
28690 IX86_BUILTIN_SCATTERPFDPD,
28691 IX86_BUILTIN_SCATTERPFDPS,
28692 IX86_BUILTIN_SCATTERPFQPD,
28693 IX86_BUILTIN_SCATTERPFQPS,
28694
28695 /* AVX-512ER */
28696 IX86_BUILTIN_EXP2PD_MASK,
28697 IX86_BUILTIN_EXP2PS_MASK,
28698 IX86_BUILTIN_EXP2PS,
28699 IX86_BUILTIN_RCP28PD,
28700 IX86_BUILTIN_RCP28PS,
28701 IX86_BUILTIN_RCP28SD,
28702 IX86_BUILTIN_RCP28SS,
28703 IX86_BUILTIN_RSQRT28PD,
28704 IX86_BUILTIN_RSQRT28PS,
28705 IX86_BUILTIN_RSQRT28SD,
28706 IX86_BUILTIN_RSQRT28SS,
28707
28708 /* SHA builtins. */
28709 IX86_BUILTIN_SHA1MSG1,
28710 IX86_BUILTIN_SHA1MSG2,
28711 IX86_BUILTIN_SHA1NEXTE,
28712 IX86_BUILTIN_SHA1RNDS4,
28713 IX86_BUILTIN_SHA256MSG1,
28714 IX86_BUILTIN_SHA256MSG2,
28715 IX86_BUILTIN_SHA256RNDS2,
28716
28717 /* CLFLUSHOPT instructions. */
28718 IX86_BUILTIN_CLFLUSHOPT,
28719
28720 /* TFmode support builtins. */
28721 IX86_BUILTIN_INFQ,
28722 IX86_BUILTIN_HUGE_VALQ,
28723 IX86_BUILTIN_FABSQ,
28724 IX86_BUILTIN_COPYSIGNQ,
28725
28726 /* Vectorizer support builtins. */
28727 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28728 IX86_BUILTIN_CPYSGNPS,
28729 IX86_BUILTIN_CPYSGNPD,
28730 IX86_BUILTIN_CPYSGNPS256,
28731 IX86_BUILTIN_CPYSGNPS512,
28732 IX86_BUILTIN_CPYSGNPD256,
28733 IX86_BUILTIN_CPYSGNPD512,
28734 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28735 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28736
28737
28738 /* FMA4 instructions. */
28739 IX86_BUILTIN_VFMADDSS,
28740 IX86_BUILTIN_VFMADDSD,
28741 IX86_BUILTIN_VFMADDPS,
28742 IX86_BUILTIN_VFMADDPD,
28743 IX86_BUILTIN_VFMADDPS256,
28744 IX86_BUILTIN_VFMADDPD256,
28745 IX86_BUILTIN_VFMADDSUBPS,
28746 IX86_BUILTIN_VFMADDSUBPD,
28747 IX86_BUILTIN_VFMADDSUBPS256,
28748 IX86_BUILTIN_VFMADDSUBPD256,
28749
28750 /* FMA3 instructions. */
28751 IX86_BUILTIN_VFMADDSS3,
28752 IX86_BUILTIN_VFMADDSD3,
28753
28754 /* XOP instructions. */
28755 IX86_BUILTIN_VPCMOV,
28756 IX86_BUILTIN_VPCMOV_V2DI,
28757 IX86_BUILTIN_VPCMOV_V4SI,
28758 IX86_BUILTIN_VPCMOV_V8HI,
28759 IX86_BUILTIN_VPCMOV_V16QI,
28760 IX86_BUILTIN_VPCMOV_V4SF,
28761 IX86_BUILTIN_VPCMOV_V2DF,
28762 IX86_BUILTIN_VPCMOV256,
28763 IX86_BUILTIN_VPCMOV_V4DI256,
28764 IX86_BUILTIN_VPCMOV_V8SI256,
28765 IX86_BUILTIN_VPCMOV_V16HI256,
28766 IX86_BUILTIN_VPCMOV_V32QI256,
28767 IX86_BUILTIN_VPCMOV_V8SF256,
28768 IX86_BUILTIN_VPCMOV_V4DF256,
28769
28770 IX86_BUILTIN_VPPERM,
28771
28772 IX86_BUILTIN_VPMACSSWW,
28773 IX86_BUILTIN_VPMACSWW,
28774 IX86_BUILTIN_VPMACSSWD,
28775 IX86_BUILTIN_VPMACSWD,
28776 IX86_BUILTIN_VPMACSSDD,
28777 IX86_BUILTIN_VPMACSDD,
28778 IX86_BUILTIN_VPMACSSDQL,
28779 IX86_BUILTIN_VPMACSSDQH,
28780 IX86_BUILTIN_VPMACSDQL,
28781 IX86_BUILTIN_VPMACSDQH,
28782 IX86_BUILTIN_VPMADCSSWD,
28783 IX86_BUILTIN_VPMADCSWD,
28784
28785 IX86_BUILTIN_VPHADDBW,
28786 IX86_BUILTIN_VPHADDBD,
28787 IX86_BUILTIN_VPHADDBQ,
28788 IX86_BUILTIN_VPHADDWD,
28789 IX86_BUILTIN_VPHADDWQ,
28790 IX86_BUILTIN_VPHADDDQ,
28791 IX86_BUILTIN_VPHADDUBW,
28792 IX86_BUILTIN_VPHADDUBD,
28793 IX86_BUILTIN_VPHADDUBQ,
28794 IX86_BUILTIN_VPHADDUWD,
28795 IX86_BUILTIN_VPHADDUWQ,
28796 IX86_BUILTIN_VPHADDUDQ,
28797 IX86_BUILTIN_VPHSUBBW,
28798 IX86_BUILTIN_VPHSUBWD,
28799 IX86_BUILTIN_VPHSUBDQ,
28800
28801 IX86_BUILTIN_VPROTB,
28802 IX86_BUILTIN_VPROTW,
28803 IX86_BUILTIN_VPROTD,
28804 IX86_BUILTIN_VPROTQ,
28805 IX86_BUILTIN_VPROTB_IMM,
28806 IX86_BUILTIN_VPROTW_IMM,
28807 IX86_BUILTIN_VPROTD_IMM,
28808 IX86_BUILTIN_VPROTQ_IMM,
28809
28810 IX86_BUILTIN_VPSHLB,
28811 IX86_BUILTIN_VPSHLW,
28812 IX86_BUILTIN_VPSHLD,
28813 IX86_BUILTIN_VPSHLQ,
28814 IX86_BUILTIN_VPSHAB,
28815 IX86_BUILTIN_VPSHAW,
28816 IX86_BUILTIN_VPSHAD,
28817 IX86_BUILTIN_VPSHAQ,
28818
28819 IX86_BUILTIN_VFRCZSS,
28820 IX86_BUILTIN_VFRCZSD,
28821 IX86_BUILTIN_VFRCZPS,
28822 IX86_BUILTIN_VFRCZPD,
28823 IX86_BUILTIN_VFRCZPS256,
28824 IX86_BUILTIN_VFRCZPD256,
28825
28826 IX86_BUILTIN_VPCOMEQUB,
28827 IX86_BUILTIN_VPCOMNEUB,
28828 IX86_BUILTIN_VPCOMLTUB,
28829 IX86_BUILTIN_VPCOMLEUB,
28830 IX86_BUILTIN_VPCOMGTUB,
28831 IX86_BUILTIN_VPCOMGEUB,
28832 IX86_BUILTIN_VPCOMFALSEUB,
28833 IX86_BUILTIN_VPCOMTRUEUB,
28834
28835 IX86_BUILTIN_VPCOMEQUW,
28836 IX86_BUILTIN_VPCOMNEUW,
28837 IX86_BUILTIN_VPCOMLTUW,
28838 IX86_BUILTIN_VPCOMLEUW,
28839 IX86_BUILTIN_VPCOMGTUW,
28840 IX86_BUILTIN_VPCOMGEUW,
28841 IX86_BUILTIN_VPCOMFALSEUW,
28842 IX86_BUILTIN_VPCOMTRUEUW,
28843
28844 IX86_BUILTIN_VPCOMEQUD,
28845 IX86_BUILTIN_VPCOMNEUD,
28846 IX86_BUILTIN_VPCOMLTUD,
28847 IX86_BUILTIN_VPCOMLEUD,
28848 IX86_BUILTIN_VPCOMGTUD,
28849 IX86_BUILTIN_VPCOMGEUD,
28850 IX86_BUILTIN_VPCOMFALSEUD,
28851 IX86_BUILTIN_VPCOMTRUEUD,
28852
28853 IX86_BUILTIN_VPCOMEQUQ,
28854 IX86_BUILTIN_VPCOMNEUQ,
28855 IX86_BUILTIN_VPCOMLTUQ,
28856 IX86_BUILTIN_VPCOMLEUQ,
28857 IX86_BUILTIN_VPCOMGTUQ,
28858 IX86_BUILTIN_VPCOMGEUQ,
28859 IX86_BUILTIN_VPCOMFALSEUQ,
28860 IX86_BUILTIN_VPCOMTRUEUQ,
28861
28862 IX86_BUILTIN_VPCOMEQB,
28863 IX86_BUILTIN_VPCOMNEB,
28864 IX86_BUILTIN_VPCOMLTB,
28865 IX86_BUILTIN_VPCOMLEB,
28866 IX86_BUILTIN_VPCOMGTB,
28867 IX86_BUILTIN_VPCOMGEB,
28868 IX86_BUILTIN_VPCOMFALSEB,
28869 IX86_BUILTIN_VPCOMTRUEB,
28870
28871 IX86_BUILTIN_VPCOMEQW,
28872 IX86_BUILTIN_VPCOMNEW,
28873 IX86_BUILTIN_VPCOMLTW,
28874 IX86_BUILTIN_VPCOMLEW,
28875 IX86_BUILTIN_VPCOMGTW,
28876 IX86_BUILTIN_VPCOMGEW,
28877 IX86_BUILTIN_VPCOMFALSEW,
28878 IX86_BUILTIN_VPCOMTRUEW,
28879
28880 IX86_BUILTIN_VPCOMEQD,
28881 IX86_BUILTIN_VPCOMNED,
28882 IX86_BUILTIN_VPCOMLTD,
28883 IX86_BUILTIN_VPCOMLED,
28884 IX86_BUILTIN_VPCOMGTD,
28885 IX86_BUILTIN_VPCOMGED,
28886 IX86_BUILTIN_VPCOMFALSED,
28887 IX86_BUILTIN_VPCOMTRUED,
28888
28889 IX86_BUILTIN_VPCOMEQQ,
28890 IX86_BUILTIN_VPCOMNEQ,
28891 IX86_BUILTIN_VPCOMLTQ,
28892 IX86_BUILTIN_VPCOMLEQ,
28893 IX86_BUILTIN_VPCOMGTQ,
28894 IX86_BUILTIN_VPCOMGEQ,
28895 IX86_BUILTIN_VPCOMFALSEQ,
28896 IX86_BUILTIN_VPCOMTRUEQ,
28897
28898 /* LWP instructions. */
28899 IX86_BUILTIN_LLWPCB,
28900 IX86_BUILTIN_SLWPCB,
28901 IX86_BUILTIN_LWPVAL32,
28902 IX86_BUILTIN_LWPVAL64,
28903 IX86_BUILTIN_LWPINS32,
28904 IX86_BUILTIN_LWPINS64,
28905
28906 IX86_BUILTIN_CLZS,
28907
28908 /* RTM */
28909 IX86_BUILTIN_XBEGIN,
28910 IX86_BUILTIN_XEND,
28911 IX86_BUILTIN_XABORT,
28912 IX86_BUILTIN_XTEST,
28913
28914 /* BMI instructions. */
28915 IX86_BUILTIN_BEXTR32,
28916 IX86_BUILTIN_BEXTR64,
28917 IX86_BUILTIN_CTZS,
28918
28919 /* TBM instructions. */
28920 IX86_BUILTIN_BEXTRI32,
28921 IX86_BUILTIN_BEXTRI64,
28922
28923 /* BMI2 instructions. */
28924 IX86_BUILTIN_BZHI32,
28925 IX86_BUILTIN_BZHI64,
28926 IX86_BUILTIN_PDEP32,
28927 IX86_BUILTIN_PDEP64,
28928 IX86_BUILTIN_PEXT32,
28929 IX86_BUILTIN_PEXT64,
28930
28931 /* ADX instructions. */
28932 IX86_BUILTIN_ADDCARRYX32,
28933 IX86_BUILTIN_ADDCARRYX64,
28934
28935 /* SBB instructions. */
28936 IX86_BUILTIN_SBB32,
28937 IX86_BUILTIN_SBB64,
28938
28939 /* FSGSBASE instructions. */
28940 IX86_BUILTIN_RDFSBASE32,
28941 IX86_BUILTIN_RDFSBASE64,
28942 IX86_BUILTIN_RDGSBASE32,
28943 IX86_BUILTIN_RDGSBASE64,
28944 IX86_BUILTIN_WRFSBASE32,
28945 IX86_BUILTIN_WRFSBASE64,
28946 IX86_BUILTIN_WRGSBASE32,
28947 IX86_BUILTIN_WRGSBASE64,
28948
28949 /* RDRND instructions. */
28950 IX86_BUILTIN_RDRAND16_STEP,
28951 IX86_BUILTIN_RDRAND32_STEP,
28952 IX86_BUILTIN_RDRAND64_STEP,
28953
28954 /* RDSEED instructions. */
28955 IX86_BUILTIN_RDSEED16_STEP,
28956 IX86_BUILTIN_RDSEED32_STEP,
28957 IX86_BUILTIN_RDSEED64_STEP,
28958
28959 /* F16C instructions. */
28960 IX86_BUILTIN_CVTPH2PS,
28961 IX86_BUILTIN_CVTPH2PS256,
28962 IX86_BUILTIN_CVTPS2PH,
28963 IX86_BUILTIN_CVTPS2PH256,
28964
28965 /* CFString built-in for darwin */
28966 IX86_BUILTIN_CFSTRING,
28967
28968 /* Builtins to get CPU type and supported features. */
28969 IX86_BUILTIN_CPU_INIT,
28970 IX86_BUILTIN_CPU_IS,
28971 IX86_BUILTIN_CPU_SUPPORTS,
28972
28973 /* Read/write FLAGS register built-ins. */
28974 IX86_BUILTIN_READ_FLAGS,
28975 IX86_BUILTIN_WRITE_FLAGS,
28976
28977 IX86_BUILTIN_MAX
28978 };
28979
28980 /* Table for the ix86 builtin decls. */
28981 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28982
28983 /* Table of all of the builtin functions that are possible with different ISA's
28984 but are waiting to be built until a function is declared to use that
28985 ISA. */
28986 struct builtin_isa {
28987 const char *name; /* function name */
28988 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28989 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28990 bool const_p; /* true if the declaration is constant */
28991 bool set_and_not_built_p;
28992 };
28993
28994 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28995
28996
28997 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28998 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28999 function decl in the ix86_builtins array. Returns the function decl or
29000 NULL_TREE, if the builtin was not added.
29001
29002 If the front end has a special hook for builtin functions, delay adding
29003 builtin functions that aren't in the current ISA until the ISA is changed
29004 with function specific optimization. Doing so, can save about 300K for the
29005 default compiler. When the builtin is expanded, check at that time whether
29006 it is valid.
29007
29008 If the front end doesn't have a special hook, record all builtins, even if
29009 it isn't an instruction set in the current ISA in case the user uses
29010 function specific options for a different ISA, so that we don't get scope
29011 errors if a builtin is added in the middle of a function scope. */
29012
29013 static inline tree
29014 def_builtin (HOST_WIDE_INT mask, const char *name,
29015 enum ix86_builtin_func_type tcode,
29016 enum ix86_builtins code)
29017 {
29018 tree decl = NULL_TREE;
29019
29020 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29021 {
29022 ix86_builtins_isa[(int) code].isa = mask;
29023
29024 mask &= ~OPTION_MASK_ISA_64BIT;
29025 if (mask == 0
29026 || (mask & ix86_isa_flags) != 0
29027 || (lang_hooks.builtin_function
29028 == lang_hooks.builtin_function_ext_scope))
29029
29030 {
29031 tree type = ix86_get_builtin_func_type (tcode);
29032 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29033 NULL, NULL_TREE);
29034 ix86_builtins[(int) code] = decl;
29035 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29036 }
29037 else
29038 {
29039 ix86_builtins[(int) code] = NULL_TREE;
29040 ix86_builtins_isa[(int) code].tcode = tcode;
29041 ix86_builtins_isa[(int) code].name = name;
29042 ix86_builtins_isa[(int) code].const_p = false;
29043 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29044 }
29045 }
29046
29047 return decl;
29048 }
29049
29050 /* Like def_builtin, but also marks the function decl "const". */
29051
29052 static inline tree
29053 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29054 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29055 {
29056 tree decl = def_builtin (mask, name, tcode, code);
29057 if (decl)
29058 TREE_READONLY (decl) = 1;
29059 else
29060 ix86_builtins_isa[(int) code].const_p = true;
29061
29062 return decl;
29063 }
29064
29065 /* Add any new builtin functions for a given ISA that may not have been
29066 declared. This saves a bit of space compared to adding all of the
29067 declarations to the tree, even if we didn't use them. */
29068
29069 static void
29070 ix86_add_new_builtins (HOST_WIDE_INT isa)
29071 {
29072 int i;
29073
29074 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29075 {
29076 if ((ix86_builtins_isa[i].isa & isa) != 0
29077 && ix86_builtins_isa[i].set_and_not_built_p)
29078 {
29079 tree decl, type;
29080
29081 /* Don't define the builtin again. */
29082 ix86_builtins_isa[i].set_and_not_built_p = false;
29083
29084 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29085 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29086 type, i, BUILT_IN_MD, NULL,
29087 NULL_TREE);
29088
29089 ix86_builtins[i] = decl;
29090 if (ix86_builtins_isa[i].const_p)
29091 TREE_READONLY (decl) = 1;
29092 }
29093 }
29094 }
29095
29096 /* Bits for builtin_description.flag. */
29097
29098 /* Set when we don't support the comparison natively, and should
29099 swap_comparison in order to support it. */
29100 #define BUILTIN_DESC_SWAP_OPERANDS 1
29101
29102 struct builtin_description
29103 {
29104 const HOST_WIDE_INT mask;
29105 const enum insn_code icode;
29106 const char *const name;
29107 const enum ix86_builtins code;
29108 const enum rtx_code comparison;
29109 const int flag;
29110 };
29111
29112 static const struct builtin_description bdesc_comi[] =
29113 {
29114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
29115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
29116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
29117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
29118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
29119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
29120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
29121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
29122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
29123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
29124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
29125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
29126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
29127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
29128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
29129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
29130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
29131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29138 };
29139
29140 static const struct builtin_description bdesc_pcmpestr[] =
29141 {
29142 /* SSE4.2 */
29143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29149 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29150 };
29151
29152 static const struct builtin_description bdesc_pcmpistr[] =
29153 {
29154 /* SSE4.2 */
29155 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29157 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29158 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29159 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29160 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29161 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29162 };
29163
29164 /* Special builtins with variable number of arguments. */
29165 static const struct builtin_description bdesc_special_args[] =
29166 {
29167 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29168 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29169 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29170
29171 /* 80387 (for use internally for atomic compound assignment). */
29172 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29173 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29174 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29175 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29176
29177 /* MMX */
29178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29179
29180 /* 3DNow! */
29181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29182
29183 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29184 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29185 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29186 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29187 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29188 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29189 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29190 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29191 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29192
29193 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29194 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29195 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29196 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29197 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29198 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29199 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29200 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29201
29202 /* SSE */
29203 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29206
29207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29211
29212 /* SSE or 3DNow!A */
29213 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29214 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29215
29216 /* SSE2 */
29217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29224 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29227
29228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29230
29231 /* SSE3 */
29232 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29233
29234 /* SSE4.1 */
29235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29236
29237 /* SSE4A */
29238 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29239 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29240
29241 /* AVX */
29242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29244
29245 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29246 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29247 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29250
29251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29258
29259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29262
29263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29271
29272 /* AVX2 */
29273 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29274 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29275 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29276 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29277 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29278 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29279 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29280 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29281 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29282
29283 /* AVX512F */
29284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29331
29332 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29333 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29334 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29335 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29336 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29337 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29338
29339 /* FSGSBASE */
29340 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29341 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29342 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29343 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29344 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29345 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29346 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29347 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29348
29349 /* RTM */
29350 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29351 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29352 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29353 };
29354
29355 /* Builtins with variable number of arguments. */
29356 static const struct builtin_description bdesc_args[] =
29357 {
29358 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29359 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29360 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29361 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29362 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29363 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29364 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29365
29366 /* MMX */
29367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29373
29374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29382
29383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29385
29386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29390
29391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29397
29398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29404
29405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29408
29409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29410
29411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29417
29418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29424
29425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29429
29430 /* 3DNow! */
29431 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29432 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29435
29436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29440 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29441 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29442 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29443 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29444 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29445 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29446 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29447 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29448 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29449 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29450 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29451
29452 /* 3DNow!A */
29453 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29454 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29455 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29456 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29457 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29458 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29459
29460 /* SSE */
29461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29463 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29465 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29469 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29473
29474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29475
29476 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29477 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29478 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29484
29485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29505
29506 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29507 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29510
29511 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29513 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29514 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29515
29516 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29517
29518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29521 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29522 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29523
29524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29526 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29527
29528 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29529
29530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29533
29534 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29535 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29536
29537 /* SSE MMX or 3Dnow!A */
29538 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29539 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29540 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29541
29542 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29543 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29544 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29545 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29546
29547 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29548 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29549
29550 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29551
29552 /* SSE2 */
29553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29554
29555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29560
29561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29566
29567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29568
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29571 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29572 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29573
29574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29576 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29577
29578 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29579 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29580 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29581 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29586
29587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29607
29608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29609 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29612
29613 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29615 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29616 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29617
29618 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29619
29620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29621 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29622 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29623
29624 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29625
29626 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29627 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29628 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29629 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29630 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29631 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29632 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29633 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29634
29635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29643
29644 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29645 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29646
29647 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29649 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29650 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29651
29652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29654
29655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29661
29662 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29663 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29664 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29666
29667 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29668 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29669 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29670 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29671 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29672 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29673 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29674 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29675
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29679
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29682
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29684 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29685
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29687
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29689 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29692
29693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29694 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29695 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29696 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29697 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29698 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29699 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29700
29701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29702 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29703 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29704 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29705 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29706 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29707 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29708
29709 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29710 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29711 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29712 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29713
29714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29717
29718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29719
29720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29721
29722 /* SSE2 MMX */
29723 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29724 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29725
29726 /* SSE3 */
29727 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29728 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29729
29730 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29731 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29732 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29733 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29734 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29735 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29736
29737 /* SSSE3 */
29738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29744
29745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29755 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29756 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29757 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29759 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29760 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29761 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29762 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29763 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29764 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29765 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29766 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29767 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29768 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29769
29770 /* SSSE3. */
29771 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29772 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29773
29774 /* SSE4.1 */
29775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29785
29786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29799
29800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29801 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29802 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29803 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29804 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29805 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29806 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29807 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29808 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29809 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29810 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29812
29813 /* SSE4.1 */
29814 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29815 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29816 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29817 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29818
29819 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29820 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29821 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29822 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29823
29824 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29825 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29826
29827 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29828 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29829
29830 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29831 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29832 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29833 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29834
29835 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29836 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29837
29838 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29839 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29840
29841 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29842 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29843 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29844
29845 /* SSE4.2 */
29846 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29847 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29848 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29849 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29850 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29851
29852 /* SSE4A */
29853 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29854 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29855 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29856 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29857
29858 /* AES */
29859 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29860 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29861
29862 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29863 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29864 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29865 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29866
29867 /* PCLMUL */
29868 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29869
29870 /* AVX */
29871 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29872 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29875 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29876 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29879 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29885 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29886 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29887 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29888 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29889 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29890 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29891 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29892 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29893 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29894 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29895 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29896 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29897
29898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29902
29903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29919 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29920 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29924 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29926 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29937
29938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29941
29942 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29944 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29946 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29947
29948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29949
29950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29952
29953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29957
29958 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29959 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29960
29961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29963
29964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29968
29969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29971
29972 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29973 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29974
29975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29979
29980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29983 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29984 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29985 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29986
29987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30002
30003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
30004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
30005
30006 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30007 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30008
30009 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30010
30011 /* AVX2 */
30012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
30013 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
30014 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
30015 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
30016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30020 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30021 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30022 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30023 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
30029 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
30034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
30035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
30051 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30052 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30053 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30054 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30055 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30056 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30057 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30058 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30059 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30060 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30061 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30062 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
30064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30076 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30078 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30079 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30080 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30081 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30082 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30083 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
30087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30093 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30094 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30095 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30096 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30097 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30098 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30099 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30100 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30101 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30102 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30104 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30105 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30106 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30107 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30108 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30109 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30110 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30111 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30112 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30113 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30126 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30149 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30150 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30151 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30152 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30153 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30154 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30158
30159 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30160
30161 /* BMI */
30162 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30163 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30164 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30165
30166 /* TBM */
30167 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30168 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30169
30170 /* F16C */
30171 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30172 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30173 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30174 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30175
30176 /* BMI2 */
30177 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30178 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30179 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30180 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30181 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30182 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30183
30184 /* AVX512F */
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30240 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30241 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30243 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30244 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30352 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30353 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30354 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30355 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30382
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30387 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30391
30392 /* Mask arithmetic operations */
30393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30403
30404 /* SHA */
30405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30412 };
30413
30414 /* Builtins with rounding support. */
30415 static const struct builtin_description bdesc_round_args[] =
30416 {
30417 /* AVX512F */
30418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30419 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30437 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30439 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30446 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30448 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30450 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30451 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30452 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30478 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30480 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30485 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30487 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30489 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30490 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30493 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30495 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30497 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30498 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30499 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30500 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30501 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30502 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30503 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30504 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30505 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30506 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30507 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30508 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30509 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30510 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30511 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30512 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30513 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30514 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30515 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30516 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30517 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30518 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30519 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30520 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30521 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30522 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30523 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30524 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30525 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30526 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30527 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30528 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30529 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30530 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30531 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30532 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30533 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30534 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30535 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30536 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30537
30538 /* AVX512ER */
30539 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30540 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30541 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30542 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30543 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30544 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30545 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30546 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30547 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30548 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30549 };
30550
30551 /* FMA4 and XOP. */
30552 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30553 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30554 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30555 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30556 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30557 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30558 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30559 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30560 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30561 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30562 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30563 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30564 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30565 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30566 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30567 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30568 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30569 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30570 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30571 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30572 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30573 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30574 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30575 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30576 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30577 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30578 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30579 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30580 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30581 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30582 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30583 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30584 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30585 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30586 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30587 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30588 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30589 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30590 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30591 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30592 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30593 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30594 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30595 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30596 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30597 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30598 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30599 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30600 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30601 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30602 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30603 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30604
30605 static const struct builtin_description bdesc_multi_arg[] =
30606 {
30607 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30608 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30609 UNKNOWN, (int)MULTI_ARG_3_SF },
30610 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30611 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30612 UNKNOWN, (int)MULTI_ARG_3_DF },
30613
30614 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30615 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30616 UNKNOWN, (int)MULTI_ARG_3_SF },
30617 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30618 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30619 UNKNOWN, (int)MULTI_ARG_3_DF },
30620
30621 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30622 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30623 UNKNOWN, (int)MULTI_ARG_3_SF },
30624 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30625 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30626 UNKNOWN, (int)MULTI_ARG_3_DF },
30627 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30628 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30629 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30630 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30631 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30632 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30633
30634 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30635 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30636 UNKNOWN, (int)MULTI_ARG_3_SF },
30637 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30638 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30639 UNKNOWN, (int)MULTI_ARG_3_DF },
30640 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30641 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30642 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30643 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30644 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30645 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30646
30647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30654
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30662
30663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30664
30665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30677
30678 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30679 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30683 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30686 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30687 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30689 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30690 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30691 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30694
30695 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30696 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30699 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30701
30702 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30703 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30704 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30705 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30706 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30707 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30708 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30710 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30712 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30713 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30715 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30716 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30717
30718 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30719 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30720 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30721 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30722 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30723 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30724 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30725
30726 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30727 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30728 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30729 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30730 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30731 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30732 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30733
30734 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30735 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30736 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30737 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30738 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30739 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30740 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30741
30742 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30743 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30744 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30745 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30746 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30747 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30748 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30749
30750 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30751 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30752 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30753 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30754 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30755 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30756 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30757
30758 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30759 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30760 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30761 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30762 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30763 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30764 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30765
30766 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30767 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30768 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30769 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30770 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30771 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30772 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30773
30774 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30775 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30776 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30777 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30778 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30779 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30780 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30781
30782 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30783 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30784 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30785 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30786 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30787 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30788 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30789 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30790
30791 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30792 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30793 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30794 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30795 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30796 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30797 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30798 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30799
30800 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30801 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30802 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30803 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30804
30805 };
30806 \f
30807 /* TM vector builtins. */
30808
30809 /* Reuse the existing x86-specific `struct builtin_description' cause
30810 we're lazy. Add casts to make them fit. */
30811 static const struct builtin_description bdesc_tm[] =
30812 {
30813 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30814 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30815 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30816 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30817 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30818 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30819 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30820
30821 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30822 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30823 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30824 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30825 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30826 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30827 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30828
30829 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30830 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30831 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30832 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30833 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30834 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30835 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30836
30837 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30838 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30839 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30840 };
30841
30842 /* TM callbacks. */
30843
30844 /* Return the builtin decl needed to load a vector of TYPE. */
30845
30846 static tree
30847 ix86_builtin_tm_load (tree type)
30848 {
30849 if (TREE_CODE (type) == VECTOR_TYPE)
30850 {
30851 switch (tree_to_uhwi (TYPE_SIZE (type)))
30852 {
30853 case 64:
30854 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30855 case 128:
30856 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30857 case 256:
30858 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30859 }
30860 }
30861 return NULL_TREE;
30862 }
30863
30864 /* Return the builtin decl needed to store a vector of TYPE. */
30865
30866 static tree
30867 ix86_builtin_tm_store (tree type)
30868 {
30869 if (TREE_CODE (type) == VECTOR_TYPE)
30870 {
30871 switch (tree_to_uhwi (TYPE_SIZE (type)))
30872 {
30873 case 64:
30874 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30875 case 128:
30876 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30877 case 256:
30878 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30879 }
30880 }
30881 return NULL_TREE;
30882 }
30883 \f
30884 /* Initialize the transactional memory vector load/store builtins. */
30885
30886 static void
30887 ix86_init_tm_builtins (void)
30888 {
30889 enum ix86_builtin_func_type ftype;
30890 const struct builtin_description *d;
30891 size_t i;
30892 tree decl;
30893 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30894 tree attrs_log, attrs_type_log;
30895
30896 if (!flag_tm)
30897 return;
30898
30899 /* If there are no builtins defined, we must be compiling in a
30900 language without trans-mem support. */
30901 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30902 return;
30903
30904 /* Use whatever attributes a normal TM load has. */
30905 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30906 attrs_load = DECL_ATTRIBUTES (decl);
30907 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30908 /* Use whatever attributes a normal TM store has. */
30909 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30910 attrs_store = DECL_ATTRIBUTES (decl);
30911 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30912 /* Use whatever attributes a normal TM log has. */
30913 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30914 attrs_log = DECL_ATTRIBUTES (decl);
30915 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30916
30917 for (i = 0, d = bdesc_tm;
30918 i < ARRAY_SIZE (bdesc_tm);
30919 i++, d++)
30920 {
30921 if ((d->mask & ix86_isa_flags) != 0
30922 || (lang_hooks.builtin_function
30923 == lang_hooks.builtin_function_ext_scope))
30924 {
30925 tree type, attrs, attrs_type;
30926 enum built_in_function code = (enum built_in_function) d->code;
30927
30928 ftype = (enum ix86_builtin_func_type) d->flag;
30929 type = ix86_get_builtin_func_type (ftype);
30930
30931 if (BUILTIN_TM_LOAD_P (code))
30932 {
30933 attrs = attrs_load;
30934 attrs_type = attrs_type_load;
30935 }
30936 else if (BUILTIN_TM_STORE_P (code))
30937 {
30938 attrs = attrs_store;
30939 attrs_type = attrs_type_store;
30940 }
30941 else
30942 {
30943 attrs = attrs_log;
30944 attrs_type = attrs_type_log;
30945 }
30946 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30947 /* The builtin without the prefix for
30948 calling it directly. */
30949 d->name + strlen ("__builtin_"),
30950 attrs);
30951 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30952 set the TYPE_ATTRIBUTES. */
30953 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30954
30955 set_builtin_decl (code, decl, false);
30956 }
30957 }
30958 }
30959
30960 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30961 in the current target ISA to allow the user to compile particular modules
30962 with different target specific options that differ from the command line
30963 options. */
30964 static void
30965 ix86_init_mmx_sse_builtins (void)
30966 {
30967 const struct builtin_description * d;
30968 enum ix86_builtin_func_type ftype;
30969 size_t i;
30970
30971 /* Add all special builtins with variable number of operands. */
30972 for (i = 0, d = bdesc_special_args;
30973 i < ARRAY_SIZE (bdesc_special_args);
30974 i++, d++)
30975 {
30976 if (d->name == 0)
30977 continue;
30978
30979 ftype = (enum ix86_builtin_func_type) d->flag;
30980 def_builtin (d->mask, d->name, ftype, d->code);
30981 }
30982
30983 /* Add all builtins with variable number of operands. */
30984 for (i = 0, d = bdesc_args;
30985 i < ARRAY_SIZE (bdesc_args);
30986 i++, d++)
30987 {
30988 if (d->name == 0)
30989 continue;
30990
30991 ftype = (enum ix86_builtin_func_type) d->flag;
30992 def_builtin_const (d->mask, d->name, ftype, d->code);
30993 }
30994
30995 /* Add all builtins with rounding. */
30996 for (i = 0, d = bdesc_round_args;
30997 i < ARRAY_SIZE (bdesc_round_args);
30998 i++, d++)
30999 {
31000 if (d->name == 0)
31001 continue;
31002
31003 ftype = (enum ix86_builtin_func_type) d->flag;
31004 def_builtin_const (d->mask, d->name, ftype, d->code);
31005 }
31006
31007 /* pcmpestr[im] insns. */
31008 for (i = 0, d = bdesc_pcmpestr;
31009 i < ARRAY_SIZE (bdesc_pcmpestr);
31010 i++, d++)
31011 {
31012 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31013 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31014 else
31015 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31016 def_builtin_const (d->mask, d->name, ftype, d->code);
31017 }
31018
31019 /* pcmpistr[im] insns. */
31020 for (i = 0, d = bdesc_pcmpistr;
31021 i < ARRAY_SIZE (bdesc_pcmpistr);
31022 i++, d++)
31023 {
31024 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31025 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31026 else
31027 ftype = INT_FTYPE_V16QI_V16QI_INT;
31028 def_builtin_const (d->mask, d->name, ftype, d->code);
31029 }
31030
31031 /* comi/ucomi insns. */
31032 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31033 {
31034 if (d->mask == OPTION_MASK_ISA_SSE2)
31035 ftype = INT_FTYPE_V2DF_V2DF;
31036 else
31037 ftype = INT_FTYPE_V4SF_V4SF;
31038 def_builtin_const (d->mask, d->name, ftype, d->code);
31039 }
31040
31041 /* SSE */
31042 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31043 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31044 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31045 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31046
31047 /* SSE or 3DNow!A */
31048 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31049 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31050 IX86_BUILTIN_MASKMOVQ);
31051
31052 /* SSE2 */
31053 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31054 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31055
31056 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31057 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31058 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31059 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31060
31061 /* SSE3. */
31062 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31063 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31064 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31065 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31066
31067 /* AES */
31068 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
31069 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31070 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
31071 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31072 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
31073 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31074 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
31075 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31076 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
31077 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31078 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
31079 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31080
31081 /* PCLMUL */
31082 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
31083 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31084
31085 /* RDRND */
31086 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31087 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31088 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31089 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31090 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31091 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31092 IX86_BUILTIN_RDRAND64_STEP);
31093
31094 /* AVX2 */
31095 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31096 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31097 IX86_BUILTIN_GATHERSIV2DF);
31098
31099 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31100 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31101 IX86_BUILTIN_GATHERSIV4DF);
31102
31103 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31104 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31105 IX86_BUILTIN_GATHERDIV2DF);
31106
31107 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31108 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31109 IX86_BUILTIN_GATHERDIV4DF);
31110
31111 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31112 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31113 IX86_BUILTIN_GATHERSIV4SF);
31114
31115 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31116 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31117 IX86_BUILTIN_GATHERSIV8SF);
31118
31119 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31120 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31121 IX86_BUILTIN_GATHERDIV4SF);
31122
31123 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31124 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31125 IX86_BUILTIN_GATHERDIV8SF);
31126
31127 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31128 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31129 IX86_BUILTIN_GATHERSIV2DI);
31130
31131 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31132 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31133 IX86_BUILTIN_GATHERSIV4DI);
31134
31135 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31136 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31137 IX86_BUILTIN_GATHERDIV2DI);
31138
31139 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31140 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31141 IX86_BUILTIN_GATHERDIV4DI);
31142
31143 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31144 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31145 IX86_BUILTIN_GATHERSIV4SI);
31146
31147 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31148 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31149 IX86_BUILTIN_GATHERSIV8SI);
31150
31151 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31152 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31153 IX86_BUILTIN_GATHERDIV4SI);
31154
31155 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31156 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31157 IX86_BUILTIN_GATHERDIV8SI);
31158
31159 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31160 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31161 IX86_BUILTIN_GATHERALTSIV4DF);
31162
31163 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31164 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31165 IX86_BUILTIN_GATHERALTDIV8SF);
31166
31167 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31168 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31169 IX86_BUILTIN_GATHERALTSIV4DI);
31170
31171 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31172 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31173 IX86_BUILTIN_GATHERALTDIV8SI);
31174
31175 /* AVX512F */
31176 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31177 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31178 IX86_BUILTIN_GATHER3SIV16SF);
31179
31180 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31181 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31182 IX86_BUILTIN_GATHER3SIV8DF);
31183
31184 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31185 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31186 IX86_BUILTIN_GATHER3DIV16SF);
31187
31188 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31189 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31190 IX86_BUILTIN_GATHER3DIV8DF);
31191
31192 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31193 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31194 IX86_BUILTIN_GATHER3SIV16SI);
31195
31196 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31197 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31198 IX86_BUILTIN_GATHER3SIV8DI);
31199
31200 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31201 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31202 IX86_BUILTIN_GATHER3DIV16SI);
31203
31204 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31205 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31206 IX86_BUILTIN_GATHER3DIV8DI);
31207
31208 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31209 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31210 IX86_BUILTIN_GATHER3ALTSIV8DF);
31211
31212 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31213 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31214 IX86_BUILTIN_GATHER3ALTDIV16SF);
31215
31216 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31217 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31218 IX86_BUILTIN_GATHER3ALTSIV8DI);
31219
31220 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31221 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31222 IX86_BUILTIN_GATHER3ALTDIV16SI);
31223
31224 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31225 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31226 IX86_BUILTIN_SCATTERSIV16SF);
31227
31228 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31229 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31230 IX86_BUILTIN_SCATTERSIV8DF);
31231
31232 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31233 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31234 IX86_BUILTIN_SCATTERDIV16SF);
31235
31236 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31237 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31238 IX86_BUILTIN_SCATTERDIV8DF);
31239
31240 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31241 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31242 IX86_BUILTIN_SCATTERSIV16SI);
31243
31244 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31245 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31246 IX86_BUILTIN_SCATTERSIV8DI);
31247
31248 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31249 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31250 IX86_BUILTIN_SCATTERDIV16SI);
31251
31252 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31253 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31254 IX86_BUILTIN_SCATTERDIV8DI);
31255
31256 /* AVX512PF */
31257 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31258 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31259 IX86_BUILTIN_GATHERPFDPD);
31260 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31261 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31262 IX86_BUILTIN_GATHERPFDPS);
31263 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31264 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31265 IX86_BUILTIN_GATHERPFQPD);
31266 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31267 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31268 IX86_BUILTIN_GATHERPFQPS);
31269 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31270 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31271 IX86_BUILTIN_SCATTERPFDPD);
31272 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31273 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31274 IX86_BUILTIN_SCATTERPFDPS);
31275 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31276 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31277 IX86_BUILTIN_SCATTERPFQPD);
31278 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31279 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31280 IX86_BUILTIN_SCATTERPFQPS);
31281
31282 /* SHA */
31283 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31284 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31285 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31286 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31287 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31288 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31289 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31290 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31291 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31292 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31293 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31294 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31295 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31296 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31297
31298 /* RTM. */
31299 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31300 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31301
31302 /* MMX access to the vec_init patterns. */
31303 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31304 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31305
31306 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31307 V4HI_FTYPE_HI_HI_HI_HI,
31308 IX86_BUILTIN_VEC_INIT_V4HI);
31309
31310 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31311 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31312 IX86_BUILTIN_VEC_INIT_V8QI);
31313
31314 /* Access to the vec_extract patterns. */
31315 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31316 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31317 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31318 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31319 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31320 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31321 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31322 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31323 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31324 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31325
31326 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31327 "__builtin_ia32_vec_ext_v4hi",
31328 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31329
31330 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31331 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31332
31333 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31334 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31335
31336 /* Access to the vec_set patterns. */
31337 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31338 "__builtin_ia32_vec_set_v2di",
31339 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31340
31341 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31342 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31343
31344 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31345 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31346
31347 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31348 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31349
31350 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31351 "__builtin_ia32_vec_set_v4hi",
31352 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31353
31354 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31355 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31356
31357 /* RDSEED */
31358 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31359 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31360 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31361 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31362 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31363 "__builtin_ia32_rdseed_di_step",
31364 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31365
31366 /* ADCX */
31367 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31368 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31369 def_builtin (OPTION_MASK_ISA_64BIT,
31370 "__builtin_ia32_addcarryx_u64",
31371 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31372 IX86_BUILTIN_ADDCARRYX64);
31373
31374 /* SBB */
31375 def_builtin (0, "__builtin_ia32_sbb_u32",
31376 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31377 def_builtin (OPTION_MASK_ISA_64BIT,
31378 "__builtin_ia32_sbb_u64",
31379 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31380 IX86_BUILTIN_SBB64);
31381
31382 /* Read/write FLAGS. */
31383 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31384 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31385 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31386 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31387 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31388 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31389 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31390 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31391
31392 /* CLFLUSHOPT. */
31393 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31394 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31395
31396 /* Add FMA4 multi-arg argument instructions */
31397 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31398 {
31399 if (d->name == 0)
31400 continue;
31401
31402 ftype = (enum ix86_builtin_func_type) d->flag;
31403 def_builtin_const (d->mask, d->name, ftype, d->code);
31404 }
31405 }
31406
31407 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31408 to return a pointer to VERSION_DECL if the outcome of the expression
31409 formed by PREDICATE_CHAIN is true. This function will be called during
31410 version dispatch to decide which function version to execute. It returns
31411 the basic block at the end, to which more conditions can be added. */
31412
31413 static basic_block
31414 add_condition_to_bb (tree function_decl, tree version_decl,
31415 tree predicate_chain, basic_block new_bb)
31416 {
31417 gimple return_stmt;
31418 tree convert_expr, result_var;
31419 gimple convert_stmt;
31420 gimple call_cond_stmt;
31421 gimple if_else_stmt;
31422
31423 basic_block bb1, bb2, bb3;
31424 edge e12, e23;
31425
31426 tree cond_var, and_expr_var = NULL_TREE;
31427 gimple_seq gseq;
31428
31429 tree predicate_decl, predicate_arg;
31430
31431 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31432
31433 gcc_assert (new_bb != NULL);
31434 gseq = bb_seq (new_bb);
31435
31436
31437 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31438 build_fold_addr_expr (version_decl));
31439 result_var = create_tmp_var (ptr_type_node, NULL);
31440 convert_stmt = gimple_build_assign (result_var, convert_expr);
31441 return_stmt = gimple_build_return (result_var);
31442
31443 if (predicate_chain == NULL_TREE)
31444 {
31445 gimple_seq_add_stmt (&gseq, convert_stmt);
31446 gimple_seq_add_stmt (&gseq, return_stmt);
31447 set_bb_seq (new_bb, gseq);
31448 gimple_set_bb (convert_stmt, new_bb);
31449 gimple_set_bb (return_stmt, new_bb);
31450 pop_cfun ();
31451 return new_bb;
31452 }
31453
31454 while (predicate_chain != NULL)
31455 {
31456 cond_var = create_tmp_var (integer_type_node, NULL);
31457 predicate_decl = TREE_PURPOSE (predicate_chain);
31458 predicate_arg = TREE_VALUE (predicate_chain);
31459 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31460 gimple_call_set_lhs (call_cond_stmt, cond_var);
31461
31462 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31463 gimple_set_bb (call_cond_stmt, new_bb);
31464 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31465
31466 predicate_chain = TREE_CHAIN (predicate_chain);
31467
31468 if (and_expr_var == NULL)
31469 and_expr_var = cond_var;
31470 else
31471 {
31472 gimple assign_stmt;
31473 /* Use MIN_EXPR to check if any integer is zero?.
31474 and_expr_var = min_expr <cond_var, and_expr_var> */
31475 assign_stmt = gimple_build_assign (and_expr_var,
31476 build2 (MIN_EXPR, integer_type_node,
31477 cond_var, and_expr_var));
31478
31479 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31480 gimple_set_bb (assign_stmt, new_bb);
31481 gimple_seq_add_stmt (&gseq, assign_stmt);
31482 }
31483 }
31484
31485 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31486 integer_zero_node,
31487 NULL_TREE, NULL_TREE);
31488 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31489 gimple_set_bb (if_else_stmt, new_bb);
31490 gimple_seq_add_stmt (&gseq, if_else_stmt);
31491
31492 gimple_seq_add_stmt (&gseq, convert_stmt);
31493 gimple_seq_add_stmt (&gseq, return_stmt);
31494 set_bb_seq (new_bb, gseq);
31495
31496 bb1 = new_bb;
31497 e12 = split_block (bb1, if_else_stmt);
31498 bb2 = e12->dest;
31499 e12->flags &= ~EDGE_FALLTHRU;
31500 e12->flags |= EDGE_TRUE_VALUE;
31501
31502 e23 = split_block (bb2, return_stmt);
31503
31504 gimple_set_bb (convert_stmt, bb2);
31505 gimple_set_bb (return_stmt, bb2);
31506
31507 bb3 = e23->dest;
31508 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31509
31510 remove_edge (e23);
31511 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31512
31513 pop_cfun ();
31514
31515 return bb3;
31516 }
31517
31518 /* This parses the attribute arguments to target in DECL and determines
31519 the right builtin to use to match the platform specification.
31520 It returns the priority value for this version decl. If PREDICATE_LIST
31521 is not NULL, it stores the list of cpu features that need to be checked
31522 before dispatching this function. */
31523
31524 static unsigned int
31525 get_builtin_code_for_version (tree decl, tree *predicate_list)
31526 {
31527 tree attrs;
31528 struct cl_target_option cur_target;
31529 tree target_node;
31530 struct cl_target_option *new_target;
31531 const char *arg_str = NULL;
31532 const char *attrs_str = NULL;
31533 char *tok_str = NULL;
31534 char *token;
31535
31536 /* Priority of i386 features, greater value is higher priority. This is
31537 used to decide the order in which function dispatch must happen. For
31538 instance, a version specialized for SSE4.2 should be checked for dispatch
31539 before a version for SSE3, as SSE4.2 implies SSE3. */
31540 enum feature_priority
31541 {
31542 P_ZERO = 0,
31543 P_MMX,
31544 P_SSE,
31545 P_SSE2,
31546 P_SSE3,
31547 P_SSSE3,
31548 P_PROC_SSSE3,
31549 P_SSE4_A,
31550 P_PROC_SSE4_A,
31551 P_SSE4_1,
31552 P_SSE4_2,
31553 P_PROC_SSE4_2,
31554 P_POPCNT,
31555 P_AVX,
31556 P_PROC_AVX,
31557 P_FMA4,
31558 P_XOP,
31559 P_PROC_XOP,
31560 P_FMA,
31561 P_PROC_FMA,
31562 P_AVX2,
31563 P_PROC_AVX2
31564 };
31565
31566 enum feature_priority priority = P_ZERO;
31567
31568 /* These are the target attribute strings for which a dispatcher is
31569 available, from fold_builtin_cpu. */
31570
31571 static struct _feature_list
31572 {
31573 const char *const name;
31574 const enum feature_priority priority;
31575 }
31576 const feature_list[] =
31577 {
31578 {"mmx", P_MMX},
31579 {"sse", P_SSE},
31580 {"sse2", P_SSE2},
31581 {"sse3", P_SSE3},
31582 {"sse4a", P_SSE4_A},
31583 {"ssse3", P_SSSE3},
31584 {"sse4.1", P_SSE4_1},
31585 {"sse4.2", P_SSE4_2},
31586 {"popcnt", P_POPCNT},
31587 {"avx", P_AVX},
31588 {"fma4", P_FMA4},
31589 {"xop", P_XOP},
31590 {"fma", P_FMA},
31591 {"avx2", P_AVX2}
31592 };
31593
31594
31595 static unsigned int NUM_FEATURES
31596 = sizeof (feature_list) / sizeof (struct _feature_list);
31597
31598 unsigned int i;
31599
31600 tree predicate_chain = NULL_TREE;
31601 tree predicate_decl, predicate_arg;
31602
31603 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31604 gcc_assert (attrs != NULL);
31605
31606 attrs = TREE_VALUE (TREE_VALUE (attrs));
31607
31608 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31609 attrs_str = TREE_STRING_POINTER (attrs);
31610
31611 /* Return priority zero for default function. */
31612 if (strcmp (attrs_str, "default") == 0)
31613 return 0;
31614
31615 /* Handle arch= if specified. For priority, set it to be 1 more than
31616 the best instruction set the processor can handle. For instance, if
31617 there is a version for atom and a version for ssse3 (the highest ISA
31618 priority for atom), the atom version must be checked for dispatch
31619 before the ssse3 version. */
31620 if (strstr (attrs_str, "arch=") != NULL)
31621 {
31622 cl_target_option_save (&cur_target, &global_options);
31623 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31624 &global_options_set);
31625
31626 gcc_assert (target_node);
31627 new_target = TREE_TARGET_OPTION (target_node);
31628 gcc_assert (new_target);
31629
31630 if (new_target->arch_specified && new_target->arch > 0)
31631 {
31632 switch (new_target->arch)
31633 {
31634 case PROCESSOR_CORE2:
31635 arg_str = "core2";
31636 priority = P_PROC_SSSE3;
31637 break;
31638 case PROCESSOR_NEHALEM:
31639 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31640 arg_str = "westmere";
31641 else
31642 /* We translate "arch=corei7" and "arch=nehalem" to
31643 "corei7" so that it will be mapped to M_INTEL_COREI7
31644 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31645 arg_str = "corei7";
31646 priority = P_PROC_SSE4_2;
31647 break;
31648 case PROCESSOR_SANDYBRIDGE:
31649 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31650 arg_str = "ivybridge";
31651 else
31652 arg_str = "sandybridge";
31653 priority = P_PROC_AVX;
31654 break;
31655 case PROCESSOR_HASWELL:
31656 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31657 arg_str = "broadwell";
31658 else
31659 arg_str = "haswell";
31660 priority = P_PROC_AVX2;
31661 break;
31662 case PROCESSOR_BONNELL:
31663 arg_str = "bonnell";
31664 priority = P_PROC_SSSE3;
31665 break;
31666 case PROCESSOR_SILVERMONT:
31667 arg_str = "silvermont";
31668 priority = P_PROC_SSE4_2;
31669 break;
31670 case PROCESSOR_AMDFAM10:
31671 arg_str = "amdfam10h";
31672 priority = P_PROC_SSE4_A;
31673 break;
31674 case PROCESSOR_BTVER1:
31675 arg_str = "btver1";
31676 priority = P_PROC_SSE4_A;
31677 break;
31678 case PROCESSOR_BTVER2:
31679 arg_str = "btver2";
31680 priority = P_PROC_AVX;
31681 break;
31682 case PROCESSOR_BDVER1:
31683 arg_str = "bdver1";
31684 priority = P_PROC_XOP;
31685 break;
31686 case PROCESSOR_BDVER2:
31687 arg_str = "bdver2";
31688 priority = P_PROC_FMA;
31689 break;
31690 case PROCESSOR_BDVER3:
31691 arg_str = "bdver3";
31692 priority = P_PROC_FMA;
31693 break;
31694 case PROCESSOR_BDVER4:
31695 arg_str = "bdver4";
31696 priority = P_PROC_AVX2;
31697 break;
31698 }
31699 }
31700
31701 cl_target_option_restore (&global_options, &cur_target);
31702
31703 if (predicate_list && arg_str == NULL)
31704 {
31705 error_at (DECL_SOURCE_LOCATION (decl),
31706 "No dispatcher found for the versioning attributes");
31707 return 0;
31708 }
31709
31710 if (predicate_list)
31711 {
31712 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31713 /* For a C string literal the length includes the trailing NULL. */
31714 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31715 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31716 predicate_chain);
31717 }
31718 }
31719
31720 /* Process feature name. */
31721 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31722 strcpy (tok_str, attrs_str);
31723 token = strtok (tok_str, ",");
31724 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31725
31726 while (token != NULL)
31727 {
31728 /* Do not process "arch=" */
31729 if (strncmp (token, "arch=", 5) == 0)
31730 {
31731 token = strtok (NULL, ",");
31732 continue;
31733 }
31734 for (i = 0; i < NUM_FEATURES; ++i)
31735 {
31736 if (strcmp (token, feature_list[i].name) == 0)
31737 {
31738 if (predicate_list)
31739 {
31740 predicate_arg = build_string_literal (
31741 strlen (feature_list[i].name) + 1,
31742 feature_list[i].name);
31743 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31744 predicate_chain);
31745 }
31746 /* Find the maximum priority feature. */
31747 if (feature_list[i].priority > priority)
31748 priority = feature_list[i].priority;
31749
31750 break;
31751 }
31752 }
31753 if (predicate_list && i == NUM_FEATURES)
31754 {
31755 error_at (DECL_SOURCE_LOCATION (decl),
31756 "No dispatcher found for %s", token);
31757 return 0;
31758 }
31759 token = strtok (NULL, ",");
31760 }
31761 free (tok_str);
31762
31763 if (predicate_list && predicate_chain == NULL_TREE)
31764 {
31765 error_at (DECL_SOURCE_LOCATION (decl),
31766 "No dispatcher found for the versioning attributes : %s",
31767 attrs_str);
31768 return 0;
31769 }
31770 else if (predicate_list)
31771 {
31772 predicate_chain = nreverse (predicate_chain);
31773 *predicate_list = predicate_chain;
31774 }
31775
31776 return priority;
31777 }
31778
31779 /* This compares the priority of target features in function DECL1
31780 and DECL2. It returns positive value if DECL1 is higher priority,
31781 negative value if DECL2 is higher priority and 0 if they are the
31782 same. */
31783
31784 static int
31785 ix86_compare_version_priority (tree decl1, tree decl2)
31786 {
31787 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31788 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31789
31790 return (int)priority1 - (int)priority2;
31791 }
31792
31793 /* V1 and V2 point to function versions with different priorities
31794 based on the target ISA. This function compares their priorities. */
31795
31796 static int
31797 feature_compare (const void *v1, const void *v2)
31798 {
31799 typedef struct _function_version_info
31800 {
31801 tree version_decl;
31802 tree predicate_chain;
31803 unsigned int dispatch_priority;
31804 } function_version_info;
31805
31806 const function_version_info c1 = *(const function_version_info *)v1;
31807 const function_version_info c2 = *(const function_version_info *)v2;
31808 return (c2.dispatch_priority - c1.dispatch_priority);
31809 }
31810
31811 /* This function generates the dispatch function for
31812 multi-versioned functions. DISPATCH_DECL is the function which will
31813 contain the dispatch logic. FNDECLS are the function choices for
31814 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31815 in DISPATCH_DECL in which the dispatch code is generated. */
31816
31817 static int
31818 dispatch_function_versions (tree dispatch_decl,
31819 void *fndecls_p,
31820 basic_block *empty_bb)
31821 {
31822 tree default_decl;
31823 gimple ifunc_cpu_init_stmt;
31824 gimple_seq gseq;
31825 int ix;
31826 tree ele;
31827 vec<tree> *fndecls;
31828 unsigned int num_versions = 0;
31829 unsigned int actual_versions = 0;
31830 unsigned int i;
31831
31832 struct _function_version_info
31833 {
31834 tree version_decl;
31835 tree predicate_chain;
31836 unsigned int dispatch_priority;
31837 }*function_version_info;
31838
31839 gcc_assert (dispatch_decl != NULL
31840 && fndecls_p != NULL
31841 && empty_bb != NULL);
31842
31843 /*fndecls_p is actually a vector. */
31844 fndecls = static_cast<vec<tree> *> (fndecls_p);
31845
31846 /* At least one more version other than the default. */
31847 num_versions = fndecls->length ();
31848 gcc_assert (num_versions >= 2);
31849
31850 function_version_info = (struct _function_version_info *)
31851 XNEWVEC (struct _function_version_info, (num_versions - 1));
31852
31853 /* The first version in the vector is the default decl. */
31854 default_decl = (*fndecls)[0];
31855
31856 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31857
31858 gseq = bb_seq (*empty_bb);
31859 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31860 constructors, so explicity call __builtin_cpu_init here. */
31861 ifunc_cpu_init_stmt = gimple_build_call_vec (
31862 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31863 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31864 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31865 set_bb_seq (*empty_bb, gseq);
31866
31867 pop_cfun ();
31868
31869
31870 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31871 {
31872 tree version_decl = ele;
31873 tree predicate_chain = NULL_TREE;
31874 unsigned int priority;
31875 /* Get attribute string, parse it and find the right predicate decl.
31876 The predicate function could be a lengthy combination of many
31877 features, like arch-type and various isa-variants. */
31878 priority = get_builtin_code_for_version (version_decl,
31879 &predicate_chain);
31880
31881 if (predicate_chain == NULL_TREE)
31882 continue;
31883
31884 function_version_info [actual_versions].version_decl = version_decl;
31885 function_version_info [actual_versions].predicate_chain
31886 = predicate_chain;
31887 function_version_info [actual_versions].dispatch_priority = priority;
31888 actual_versions++;
31889 }
31890
31891 /* Sort the versions according to descending order of dispatch priority. The
31892 priority is based on the ISA. This is not a perfect solution. There
31893 could still be ambiguity. If more than one function version is suitable
31894 to execute, which one should be dispatched? In future, allow the user
31895 to specify a dispatch priority next to the version. */
31896 qsort (function_version_info, actual_versions,
31897 sizeof (struct _function_version_info), feature_compare);
31898
31899 for (i = 0; i < actual_versions; ++i)
31900 *empty_bb = add_condition_to_bb (dispatch_decl,
31901 function_version_info[i].version_decl,
31902 function_version_info[i].predicate_chain,
31903 *empty_bb);
31904
31905 /* dispatch default version at the end. */
31906 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31907 NULL, *empty_bb);
31908
31909 free (function_version_info);
31910 return 0;
31911 }
31912
31913 /* Comparator function to be used in qsort routine to sort attribute
31914 specification strings to "target". */
31915
31916 static int
31917 attr_strcmp (const void *v1, const void *v2)
31918 {
31919 const char *c1 = *(char *const*)v1;
31920 const char *c2 = *(char *const*)v2;
31921 return strcmp (c1, c2);
31922 }
31923
31924 /* ARGLIST is the argument to target attribute. This function tokenizes
31925 the comma separated arguments, sorts them and returns a string which
31926 is a unique identifier for the comma separated arguments. It also
31927 replaces non-identifier characters "=,-" with "_". */
31928
31929 static char *
31930 sorted_attr_string (tree arglist)
31931 {
31932 tree arg;
31933 size_t str_len_sum = 0;
31934 char **args = NULL;
31935 char *attr_str, *ret_str;
31936 char *attr = NULL;
31937 unsigned int argnum = 1;
31938 unsigned int i;
31939
31940 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31941 {
31942 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31943 size_t len = strlen (str);
31944 str_len_sum += len + 1;
31945 if (arg != arglist)
31946 argnum++;
31947 for (i = 0; i < strlen (str); i++)
31948 if (str[i] == ',')
31949 argnum++;
31950 }
31951
31952 attr_str = XNEWVEC (char, str_len_sum);
31953 str_len_sum = 0;
31954 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31955 {
31956 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31957 size_t len = strlen (str);
31958 memcpy (attr_str + str_len_sum, str, len);
31959 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31960 str_len_sum += len + 1;
31961 }
31962
31963 /* Replace "=,-" with "_". */
31964 for (i = 0; i < strlen (attr_str); i++)
31965 if (attr_str[i] == '=' || attr_str[i]== '-')
31966 attr_str[i] = '_';
31967
31968 if (argnum == 1)
31969 return attr_str;
31970
31971 args = XNEWVEC (char *, argnum);
31972
31973 i = 0;
31974 attr = strtok (attr_str, ",");
31975 while (attr != NULL)
31976 {
31977 args[i] = attr;
31978 i++;
31979 attr = strtok (NULL, ",");
31980 }
31981
31982 qsort (args, argnum, sizeof (char *), attr_strcmp);
31983
31984 ret_str = XNEWVEC (char, str_len_sum);
31985 str_len_sum = 0;
31986 for (i = 0; i < argnum; i++)
31987 {
31988 size_t len = strlen (args[i]);
31989 memcpy (ret_str + str_len_sum, args[i], len);
31990 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31991 str_len_sum += len + 1;
31992 }
31993
31994 XDELETEVEC (args);
31995 XDELETEVEC (attr_str);
31996 return ret_str;
31997 }
31998
31999 /* This function changes the assembler name for functions that are
32000 versions. If DECL is a function version and has a "target"
32001 attribute, it appends the attribute string to its assembler name. */
32002
32003 static tree
32004 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32005 {
32006 tree version_attr;
32007 const char *orig_name, *version_string;
32008 char *attr_str, *assembler_name;
32009
32010 if (DECL_DECLARED_INLINE_P (decl)
32011 && lookup_attribute ("gnu_inline",
32012 DECL_ATTRIBUTES (decl)))
32013 error_at (DECL_SOURCE_LOCATION (decl),
32014 "Function versions cannot be marked as gnu_inline,"
32015 " bodies have to be generated");
32016
32017 if (DECL_VIRTUAL_P (decl)
32018 || DECL_VINDEX (decl))
32019 sorry ("Virtual function multiversioning not supported");
32020
32021 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32022
32023 /* target attribute string cannot be NULL. */
32024 gcc_assert (version_attr != NULL_TREE);
32025
32026 orig_name = IDENTIFIER_POINTER (id);
32027 version_string
32028 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32029
32030 if (strcmp (version_string, "default") == 0)
32031 return id;
32032
32033 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32034 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32035
32036 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32037
32038 /* Allow assembler name to be modified if already set. */
32039 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32040 SET_DECL_RTL (decl, NULL);
32041
32042 tree ret = get_identifier (assembler_name);
32043 XDELETEVEC (attr_str);
32044 XDELETEVEC (assembler_name);
32045 return ret;
32046 }
32047
32048 /* This function returns true if FN1 and FN2 are versions of the same function,
32049 that is, the target strings of the function decls are different. This assumes
32050 that FN1 and FN2 have the same signature. */
32051
32052 static bool
32053 ix86_function_versions (tree fn1, tree fn2)
32054 {
32055 tree attr1, attr2;
32056 char *target1, *target2;
32057 bool result;
32058
32059 if (TREE_CODE (fn1) != FUNCTION_DECL
32060 || TREE_CODE (fn2) != FUNCTION_DECL)
32061 return false;
32062
32063 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
32064 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
32065
32066 /* At least one function decl should have the target attribute specified. */
32067 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
32068 return false;
32069
32070 /* Diagnose missing target attribute if one of the decls is already
32071 multi-versioned. */
32072 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
32073 {
32074 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
32075 {
32076 if (attr2 != NULL_TREE)
32077 {
32078 tree tem = fn1;
32079 fn1 = fn2;
32080 fn2 = tem;
32081 attr1 = attr2;
32082 }
32083 error_at (DECL_SOURCE_LOCATION (fn2),
32084 "missing %<target%> attribute for multi-versioned %D",
32085 fn2);
32086 inform (DECL_SOURCE_LOCATION (fn1),
32087 "previous declaration of %D", fn1);
32088 /* Prevent diagnosing of the same error multiple times. */
32089 DECL_ATTRIBUTES (fn2)
32090 = tree_cons (get_identifier ("target"),
32091 copy_node (TREE_VALUE (attr1)),
32092 DECL_ATTRIBUTES (fn2));
32093 }
32094 return false;
32095 }
32096
32097 target1 = sorted_attr_string (TREE_VALUE (attr1));
32098 target2 = sorted_attr_string (TREE_VALUE (attr2));
32099
32100 /* The sorted target strings must be different for fn1 and fn2
32101 to be versions. */
32102 if (strcmp (target1, target2) == 0)
32103 result = false;
32104 else
32105 result = true;
32106
32107 XDELETEVEC (target1);
32108 XDELETEVEC (target2);
32109
32110 return result;
32111 }
32112
32113 static tree
32114 ix86_mangle_decl_assembler_name (tree decl, tree id)
32115 {
32116 /* For function version, add the target suffix to the assembler name. */
32117 if (TREE_CODE (decl) == FUNCTION_DECL
32118 && DECL_FUNCTION_VERSIONED (decl))
32119 id = ix86_mangle_function_version_assembler_name (decl, id);
32120 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32121 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32122 #endif
32123
32124 return id;
32125 }
32126
32127 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
32128 is true, append the full path name of the source file. */
32129
32130 static char *
32131 make_name (tree decl, const char *suffix, bool make_unique)
32132 {
32133 char *global_var_name;
32134 int name_len;
32135 const char *name;
32136 const char *unique_name = NULL;
32137
32138 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32139
32140 /* Get a unique name that can be used globally without any chances
32141 of collision at link time. */
32142 if (make_unique)
32143 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32144
32145 name_len = strlen (name) + strlen (suffix) + 2;
32146
32147 if (make_unique)
32148 name_len += strlen (unique_name) + 1;
32149 global_var_name = XNEWVEC (char, name_len);
32150
32151 /* Use '.' to concatenate names as it is demangler friendly. */
32152 if (make_unique)
32153 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32154 suffix);
32155 else
32156 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32157
32158 return global_var_name;
32159 }
32160
32161 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32162
32163 /* Make a dispatcher declaration for the multi-versioned function DECL.
32164 Calls to DECL function will be replaced with calls to the dispatcher
32165 by the front-end. Return the decl created. */
32166
32167 static tree
32168 make_dispatcher_decl (const tree decl)
32169 {
32170 tree func_decl;
32171 char *func_name;
32172 tree fn_type, func_type;
32173 bool is_uniq = false;
32174
32175 if (TREE_PUBLIC (decl) == 0)
32176 is_uniq = true;
32177
32178 func_name = make_name (decl, "ifunc", is_uniq);
32179
32180 fn_type = TREE_TYPE (decl);
32181 func_type = build_function_type (TREE_TYPE (fn_type),
32182 TYPE_ARG_TYPES (fn_type));
32183
32184 func_decl = build_fn_decl (func_name, func_type);
32185 XDELETEVEC (func_name);
32186 TREE_USED (func_decl) = 1;
32187 DECL_CONTEXT (func_decl) = NULL_TREE;
32188 DECL_INITIAL (func_decl) = error_mark_node;
32189 DECL_ARTIFICIAL (func_decl) = 1;
32190 /* Mark this func as external, the resolver will flip it again if
32191 it gets generated. */
32192 DECL_EXTERNAL (func_decl) = 1;
32193 /* This will be of type IFUNCs have to be externally visible. */
32194 TREE_PUBLIC (func_decl) = 1;
32195
32196 return func_decl;
32197 }
32198
32199 #endif
32200
32201 /* Returns true if decl is multi-versioned and DECL is the default function,
32202 that is it is not tagged with target specific optimization. */
32203
32204 static bool
32205 is_function_default_version (const tree decl)
32206 {
32207 if (TREE_CODE (decl) != FUNCTION_DECL
32208 || !DECL_FUNCTION_VERSIONED (decl))
32209 return false;
32210 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32211 gcc_assert (attr);
32212 attr = TREE_VALUE (TREE_VALUE (attr));
32213 return (TREE_CODE (attr) == STRING_CST
32214 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32215 }
32216
32217 /* Make a dispatcher declaration for the multi-versioned function DECL.
32218 Calls to DECL function will be replaced with calls to the dispatcher
32219 by the front-end. Returns the decl of the dispatcher function. */
32220
32221 static tree
32222 ix86_get_function_versions_dispatcher (void *decl)
32223 {
32224 tree fn = (tree) decl;
32225 struct cgraph_node *node = NULL;
32226 struct cgraph_node *default_node = NULL;
32227 struct cgraph_function_version_info *node_v = NULL;
32228 struct cgraph_function_version_info *first_v = NULL;
32229
32230 tree dispatch_decl = NULL;
32231
32232 struct cgraph_function_version_info *default_version_info = NULL;
32233
32234 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32235
32236 node = cgraph_node::get (fn);
32237 gcc_assert (node != NULL);
32238
32239 node_v = node->function_version ();
32240 gcc_assert (node_v != NULL);
32241
32242 if (node_v->dispatcher_resolver != NULL)
32243 return node_v->dispatcher_resolver;
32244
32245 /* Find the default version and make it the first node. */
32246 first_v = node_v;
32247 /* Go to the beginning of the chain. */
32248 while (first_v->prev != NULL)
32249 first_v = first_v->prev;
32250 default_version_info = first_v;
32251 while (default_version_info != NULL)
32252 {
32253 if (is_function_default_version
32254 (default_version_info->this_node->decl))
32255 break;
32256 default_version_info = default_version_info->next;
32257 }
32258
32259 /* If there is no default node, just return NULL. */
32260 if (default_version_info == NULL)
32261 return NULL;
32262
32263 /* Make default info the first node. */
32264 if (first_v != default_version_info)
32265 {
32266 default_version_info->prev->next = default_version_info->next;
32267 if (default_version_info->next)
32268 default_version_info->next->prev = default_version_info->prev;
32269 first_v->prev = default_version_info;
32270 default_version_info->next = first_v;
32271 default_version_info->prev = NULL;
32272 }
32273
32274 default_node = default_version_info->this_node;
32275
32276 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32277 if (targetm.has_ifunc_p ())
32278 {
32279 struct cgraph_function_version_info *it_v = NULL;
32280 struct cgraph_node *dispatcher_node = NULL;
32281 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32282
32283 /* Right now, the dispatching is done via ifunc. */
32284 dispatch_decl = make_dispatcher_decl (default_node->decl);
32285
32286 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32287 gcc_assert (dispatcher_node != NULL);
32288 dispatcher_node->dispatcher_function = 1;
32289 dispatcher_version_info
32290 = dispatcher_node->insert_new_function_version ();
32291 dispatcher_version_info->next = default_version_info;
32292 dispatcher_node->definition = 1;
32293
32294 /* Set the dispatcher for all the versions. */
32295 it_v = default_version_info;
32296 while (it_v != NULL)
32297 {
32298 it_v->dispatcher_resolver = dispatch_decl;
32299 it_v = it_v->next;
32300 }
32301 }
32302 else
32303 #endif
32304 {
32305 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32306 "multiversioning needs ifunc which is not supported "
32307 "on this target");
32308 }
32309
32310 return dispatch_decl;
32311 }
32312
32313 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32314 it to CHAIN. */
32315
32316 static tree
32317 make_attribute (const char *name, const char *arg_name, tree chain)
32318 {
32319 tree attr_name;
32320 tree attr_arg_name;
32321 tree attr_args;
32322 tree attr;
32323
32324 attr_name = get_identifier (name);
32325 attr_arg_name = build_string (strlen (arg_name), arg_name);
32326 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32327 attr = tree_cons (attr_name, attr_args, chain);
32328 return attr;
32329 }
32330
32331 /* Make the resolver function decl to dispatch the versions of
32332 a multi-versioned function, DEFAULT_DECL. Create an
32333 empty basic block in the resolver and store the pointer in
32334 EMPTY_BB. Return the decl of the resolver function. */
32335
32336 static tree
32337 make_resolver_func (const tree default_decl,
32338 const tree dispatch_decl,
32339 basic_block *empty_bb)
32340 {
32341 char *resolver_name;
32342 tree decl, type, decl_name, t;
32343 bool is_uniq = false;
32344
32345 /* IFUNC's have to be globally visible. So, if the default_decl is
32346 not, then the name of the IFUNC should be made unique. */
32347 if (TREE_PUBLIC (default_decl) == 0)
32348 is_uniq = true;
32349
32350 /* Append the filename to the resolver function if the versions are
32351 not externally visible. This is because the resolver function has
32352 to be externally visible for the loader to find it. So, appending
32353 the filename will prevent conflicts with a resolver function from
32354 another module which is based on the same version name. */
32355 resolver_name = make_name (default_decl, "resolver", is_uniq);
32356
32357 /* The resolver function should return a (void *). */
32358 type = build_function_type_list (ptr_type_node, NULL_TREE);
32359
32360 decl = build_fn_decl (resolver_name, type);
32361 decl_name = get_identifier (resolver_name);
32362 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32363
32364 DECL_NAME (decl) = decl_name;
32365 TREE_USED (decl) = 1;
32366 DECL_ARTIFICIAL (decl) = 1;
32367 DECL_IGNORED_P (decl) = 0;
32368 /* IFUNC resolvers have to be externally visible. */
32369 TREE_PUBLIC (decl) = 1;
32370 DECL_UNINLINABLE (decl) = 1;
32371
32372 /* Resolver is not external, body is generated. */
32373 DECL_EXTERNAL (decl) = 0;
32374 DECL_EXTERNAL (dispatch_decl) = 0;
32375
32376 DECL_CONTEXT (decl) = NULL_TREE;
32377 DECL_INITIAL (decl) = make_node (BLOCK);
32378 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32379
32380 if (DECL_COMDAT_GROUP (default_decl)
32381 || TREE_PUBLIC (default_decl))
32382 {
32383 /* In this case, each translation unit with a call to this
32384 versioned function will put out a resolver. Ensure it
32385 is comdat to keep just one copy. */
32386 DECL_COMDAT (decl) = 1;
32387 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32388 }
32389 /* Build result decl and add to function_decl. */
32390 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32391 DECL_ARTIFICIAL (t) = 1;
32392 DECL_IGNORED_P (t) = 1;
32393 DECL_RESULT (decl) = t;
32394
32395 gimplify_function_tree (decl);
32396 push_cfun (DECL_STRUCT_FUNCTION (decl));
32397 *empty_bb = init_lowered_empty_function (decl, false);
32398
32399 cgraph_node::add_new_function (decl, true);
32400 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32401
32402 pop_cfun ();
32403
32404 gcc_assert (dispatch_decl != NULL);
32405 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32406 DECL_ATTRIBUTES (dispatch_decl)
32407 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32408
32409 /* Create the alias for dispatch to resolver here. */
32410 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32411 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32412 XDELETEVEC (resolver_name);
32413 return decl;
32414 }
32415
32416 /* Generate the dispatching code body to dispatch multi-versioned function
32417 DECL. The target hook is called to process the "target" attributes and
32418 provide the code to dispatch the right function at run-time. NODE points
32419 to the dispatcher decl whose body will be created. */
32420
32421 static tree
32422 ix86_generate_version_dispatcher_body (void *node_p)
32423 {
32424 tree resolver_decl;
32425 basic_block empty_bb;
32426 tree default_ver_decl;
32427 struct cgraph_node *versn;
32428 struct cgraph_node *node;
32429
32430 struct cgraph_function_version_info *node_version_info = NULL;
32431 struct cgraph_function_version_info *versn_info = NULL;
32432
32433 node = (cgraph_node *)node_p;
32434
32435 node_version_info = node->function_version ();
32436 gcc_assert (node->dispatcher_function
32437 && node_version_info != NULL);
32438
32439 if (node_version_info->dispatcher_resolver)
32440 return node_version_info->dispatcher_resolver;
32441
32442 /* The first version in the chain corresponds to the default version. */
32443 default_ver_decl = node_version_info->next->this_node->decl;
32444
32445 /* node is going to be an alias, so remove the finalized bit. */
32446 node->definition = false;
32447
32448 resolver_decl = make_resolver_func (default_ver_decl,
32449 node->decl, &empty_bb);
32450
32451 node_version_info->dispatcher_resolver = resolver_decl;
32452
32453 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32454
32455 auto_vec<tree, 2> fn_ver_vec;
32456
32457 for (versn_info = node_version_info->next; versn_info;
32458 versn_info = versn_info->next)
32459 {
32460 versn = versn_info->this_node;
32461 /* Check for virtual functions here again, as by this time it should
32462 have been determined if this function needs a vtable index or
32463 not. This happens for methods in derived classes that override
32464 virtual methods in base classes but are not explicitly marked as
32465 virtual. */
32466 if (DECL_VINDEX (versn->decl))
32467 sorry ("Virtual function multiversioning not supported");
32468
32469 fn_ver_vec.safe_push (versn->decl);
32470 }
32471
32472 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32473 cgraph_edge::rebuild_edges ();
32474 pop_cfun ();
32475 return resolver_decl;
32476 }
32477 /* This builds the processor_model struct type defined in
32478 libgcc/config/i386/cpuinfo.c */
32479
32480 static tree
32481 build_processor_model_struct (void)
32482 {
32483 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32484 "__cpu_features"};
32485 tree field = NULL_TREE, field_chain = NULL_TREE;
32486 int i;
32487 tree type = make_node (RECORD_TYPE);
32488
32489 /* The first 3 fields are unsigned int. */
32490 for (i = 0; i < 3; ++i)
32491 {
32492 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32493 get_identifier (field_name[i]), unsigned_type_node);
32494 if (field_chain != NULL_TREE)
32495 DECL_CHAIN (field) = field_chain;
32496 field_chain = field;
32497 }
32498
32499 /* The last field is an array of unsigned integers of size one. */
32500 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32501 get_identifier (field_name[3]),
32502 build_array_type (unsigned_type_node,
32503 build_index_type (size_one_node)));
32504 if (field_chain != NULL_TREE)
32505 DECL_CHAIN (field) = field_chain;
32506 field_chain = field;
32507
32508 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32509 return type;
32510 }
32511
32512 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32513
32514 static tree
32515 make_var_decl (tree type, const char *name)
32516 {
32517 tree new_decl;
32518
32519 new_decl = build_decl (UNKNOWN_LOCATION,
32520 VAR_DECL,
32521 get_identifier(name),
32522 type);
32523
32524 DECL_EXTERNAL (new_decl) = 1;
32525 TREE_STATIC (new_decl) = 1;
32526 TREE_PUBLIC (new_decl) = 1;
32527 DECL_INITIAL (new_decl) = 0;
32528 DECL_ARTIFICIAL (new_decl) = 0;
32529 DECL_PRESERVE_P (new_decl) = 1;
32530
32531 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32532 assemble_variable (new_decl, 0, 0, 0);
32533
32534 return new_decl;
32535 }
32536
32537 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32538 into an integer defined in libgcc/config/i386/cpuinfo.c */
32539
32540 static tree
32541 fold_builtin_cpu (tree fndecl, tree *args)
32542 {
32543 unsigned int i;
32544 enum ix86_builtins fn_code = (enum ix86_builtins)
32545 DECL_FUNCTION_CODE (fndecl);
32546 tree param_string_cst = NULL;
32547
32548 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32549 enum processor_features
32550 {
32551 F_CMOV = 0,
32552 F_MMX,
32553 F_POPCNT,
32554 F_SSE,
32555 F_SSE2,
32556 F_SSE3,
32557 F_SSSE3,
32558 F_SSE4_1,
32559 F_SSE4_2,
32560 F_AVX,
32561 F_AVX2,
32562 F_SSE4_A,
32563 F_FMA4,
32564 F_XOP,
32565 F_FMA,
32566 F_MAX
32567 };
32568
32569 /* These are the values for vendor types and cpu types and subtypes
32570 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32571 the corresponding start value. */
32572 enum processor_model
32573 {
32574 M_INTEL = 1,
32575 M_AMD,
32576 M_CPU_TYPE_START,
32577 M_INTEL_BONNELL,
32578 M_INTEL_CORE2,
32579 M_INTEL_COREI7,
32580 M_AMDFAM10H,
32581 M_AMDFAM15H,
32582 M_INTEL_SILVERMONT,
32583 M_AMD_BTVER1,
32584 M_AMD_BTVER2,
32585 M_CPU_SUBTYPE_START,
32586 M_INTEL_COREI7_NEHALEM,
32587 M_INTEL_COREI7_WESTMERE,
32588 M_INTEL_COREI7_SANDYBRIDGE,
32589 M_AMDFAM10H_BARCELONA,
32590 M_AMDFAM10H_SHANGHAI,
32591 M_AMDFAM10H_ISTANBUL,
32592 M_AMDFAM15H_BDVER1,
32593 M_AMDFAM15H_BDVER2,
32594 M_AMDFAM15H_BDVER3,
32595 M_AMDFAM15H_BDVER4,
32596 M_INTEL_COREI7_IVYBRIDGE,
32597 M_INTEL_COREI7_HASWELL
32598 };
32599
32600 static struct _arch_names_table
32601 {
32602 const char *const name;
32603 const enum processor_model model;
32604 }
32605 const arch_names_table[] =
32606 {
32607 {"amd", M_AMD},
32608 {"intel", M_INTEL},
32609 {"atom", M_INTEL_BONNELL},
32610 {"slm", M_INTEL_SILVERMONT},
32611 {"core2", M_INTEL_CORE2},
32612 {"corei7", M_INTEL_COREI7},
32613 {"nehalem", M_INTEL_COREI7_NEHALEM},
32614 {"westmere", M_INTEL_COREI7_WESTMERE},
32615 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32616 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32617 {"haswell", M_INTEL_COREI7_HASWELL},
32618 {"bonnell", M_INTEL_BONNELL},
32619 {"silvermont", M_INTEL_SILVERMONT},
32620 {"amdfam10h", M_AMDFAM10H},
32621 {"barcelona", M_AMDFAM10H_BARCELONA},
32622 {"shanghai", M_AMDFAM10H_SHANGHAI},
32623 {"istanbul", M_AMDFAM10H_ISTANBUL},
32624 {"btver1", M_AMD_BTVER1},
32625 {"amdfam15h", M_AMDFAM15H},
32626 {"bdver1", M_AMDFAM15H_BDVER1},
32627 {"bdver2", M_AMDFAM15H_BDVER2},
32628 {"bdver3", M_AMDFAM15H_BDVER3},
32629 {"bdver4", M_AMDFAM15H_BDVER4},
32630 {"btver2", M_AMD_BTVER2},
32631 };
32632
32633 static struct _isa_names_table
32634 {
32635 const char *const name;
32636 const enum processor_features feature;
32637 }
32638 const isa_names_table[] =
32639 {
32640 {"cmov", F_CMOV},
32641 {"mmx", F_MMX},
32642 {"popcnt", F_POPCNT},
32643 {"sse", F_SSE},
32644 {"sse2", F_SSE2},
32645 {"sse3", F_SSE3},
32646 {"ssse3", F_SSSE3},
32647 {"sse4a", F_SSE4_A},
32648 {"sse4.1", F_SSE4_1},
32649 {"sse4.2", F_SSE4_2},
32650 {"avx", F_AVX},
32651 {"fma4", F_FMA4},
32652 {"xop", F_XOP},
32653 {"fma", F_FMA},
32654 {"avx2", F_AVX2}
32655 };
32656
32657 tree __processor_model_type = build_processor_model_struct ();
32658 tree __cpu_model_var = make_var_decl (__processor_model_type,
32659 "__cpu_model");
32660
32661
32662 varpool_node::add (__cpu_model_var);
32663
32664 gcc_assert ((args != NULL) && (*args != NULL));
32665
32666 param_string_cst = *args;
32667 while (param_string_cst
32668 && TREE_CODE (param_string_cst) != STRING_CST)
32669 {
32670 /* *args must be a expr that can contain other EXPRS leading to a
32671 STRING_CST. */
32672 if (!EXPR_P (param_string_cst))
32673 {
32674 error ("Parameter to builtin must be a string constant or literal");
32675 return integer_zero_node;
32676 }
32677 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32678 }
32679
32680 gcc_assert (param_string_cst);
32681
32682 if (fn_code == IX86_BUILTIN_CPU_IS)
32683 {
32684 tree ref;
32685 tree field;
32686 tree final;
32687
32688 unsigned int field_val = 0;
32689 unsigned int NUM_ARCH_NAMES
32690 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32691
32692 for (i = 0; i < NUM_ARCH_NAMES; i++)
32693 if (strcmp (arch_names_table[i].name,
32694 TREE_STRING_POINTER (param_string_cst)) == 0)
32695 break;
32696
32697 if (i == NUM_ARCH_NAMES)
32698 {
32699 error ("Parameter to builtin not valid: %s",
32700 TREE_STRING_POINTER (param_string_cst));
32701 return integer_zero_node;
32702 }
32703
32704 field = TYPE_FIELDS (__processor_model_type);
32705 field_val = arch_names_table[i].model;
32706
32707 /* CPU types are stored in the next field. */
32708 if (field_val > M_CPU_TYPE_START
32709 && field_val < M_CPU_SUBTYPE_START)
32710 {
32711 field = DECL_CHAIN (field);
32712 field_val -= M_CPU_TYPE_START;
32713 }
32714
32715 /* CPU subtypes are stored in the next field. */
32716 if (field_val > M_CPU_SUBTYPE_START)
32717 {
32718 field = DECL_CHAIN ( DECL_CHAIN (field));
32719 field_val -= M_CPU_SUBTYPE_START;
32720 }
32721
32722 /* Get the appropriate field in __cpu_model. */
32723 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32724 field, NULL_TREE);
32725
32726 /* Check the value. */
32727 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32728 build_int_cstu (unsigned_type_node, field_val));
32729 return build1 (CONVERT_EXPR, integer_type_node, final);
32730 }
32731 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32732 {
32733 tree ref;
32734 tree array_elt;
32735 tree field;
32736 tree final;
32737
32738 unsigned int field_val = 0;
32739 unsigned int NUM_ISA_NAMES
32740 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32741
32742 for (i = 0; i < NUM_ISA_NAMES; i++)
32743 if (strcmp (isa_names_table[i].name,
32744 TREE_STRING_POINTER (param_string_cst)) == 0)
32745 break;
32746
32747 if (i == NUM_ISA_NAMES)
32748 {
32749 error ("Parameter to builtin not valid: %s",
32750 TREE_STRING_POINTER (param_string_cst));
32751 return integer_zero_node;
32752 }
32753
32754 field = TYPE_FIELDS (__processor_model_type);
32755 /* Get the last field, which is __cpu_features. */
32756 while (DECL_CHAIN (field))
32757 field = DECL_CHAIN (field);
32758
32759 /* Get the appropriate field: __cpu_model.__cpu_features */
32760 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32761 field, NULL_TREE);
32762
32763 /* Access the 0th element of __cpu_features array. */
32764 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32765 integer_zero_node, NULL_TREE, NULL_TREE);
32766
32767 field_val = (1 << isa_names_table[i].feature);
32768 /* Return __cpu_model.__cpu_features[0] & field_val */
32769 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32770 build_int_cstu (unsigned_type_node, field_val));
32771 return build1 (CONVERT_EXPR, integer_type_node, final);
32772 }
32773 gcc_unreachable ();
32774 }
32775
32776 static tree
32777 ix86_fold_builtin (tree fndecl, int n_args,
32778 tree *args, bool ignore ATTRIBUTE_UNUSED)
32779 {
32780 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32781 {
32782 enum ix86_builtins fn_code = (enum ix86_builtins)
32783 DECL_FUNCTION_CODE (fndecl);
32784 if (fn_code == IX86_BUILTIN_CPU_IS
32785 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32786 {
32787 gcc_assert (n_args == 1);
32788 return fold_builtin_cpu (fndecl, args);
32789 }
32790 }
32791
32792 #ifdef SUBTARGET_FOLD_BUILTIN
32793 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32794 #endif
32795
32796 return NULL_TREE;
32797 }
32798
32799 /* Make builtins to detect cpu type and features supported. NAME is
32800 the builtin name, CODE is the builtin code, and FTYPE is the function
32801 type of the builtin. */
32802
32803 static void
32804 make_cpu_type_builtin (const char* name, int code,
32805 enum ix86_builtin_func_type ftype, bool is_const)
32806 {
32807 tree decl;
32808 tree type;
32809
32810 type = ix86_get_builtin_func_type (ftype);
32811 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32812 NULL, NULL_TREE);
32813 gcc_assert (decl != NULL_TREE);
32814 ix86_builtins[(int) code] = decl;
32815 TREE_READONLY (decl) = is_const;
32816 }
32817
32818 /* Make builtins to get CPU type and features supported. The created
32819 builtins are :
32820
32821 __builtin_cpu_init (), to detect cpu type and features,
32822 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32823 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32824 */
32825
32826 static void
32827 ix86_init_platform_type_builtins (void)
32828 {
32829 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32830 INT_FTYPE_VOID, false);
32831 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32832 INT_FTYPE_PCCHAR, true);
32833 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32834 INT_FTYPE_PCCHAR, true);
32835 }
32836
32837 /* Internal method for ix86_init_builtins. */
32838
32839 static void
32840 ix86_init_builtins_va_builtins_abi (void)
32841 {
32842 tree ms_va_ref, sysv_va_ref;
32843 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32844 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32845 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32846 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32847
32848 if (!TARGET_64BIT)
32849 return;
32850 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32851 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32852 ms_va_ref = build_reference_type (ms_va_list_type_node);
32853 sysv_va_ref =
32854 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32855
32856 fnvoid_va_end_ms =
32857 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32858 fnvoid_va_start_ms =
32859 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32860 fnvoid_va_end_sysv =
32861 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32862 fnvoid_va_start_sysv =
32863 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32864 NULL_TREE);
32865 fnvoid_va_copy_ms =
32866 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32867 NULL_TREE);
32868 fnvoid_va_copy_sysv =
32869 build_function_type_list (void_type_node, sysv_va_ref,
32870 sysv_va_ref, NULL_TREE);
32871
32872 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32873 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32874 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32875 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32876 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32877 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32878 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32879 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32880 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32881 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32882 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32883 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32884 }
32885
32886 static void
32887 ix86_init_builtin_types (void)
32888 {
32889 tree float128_type_node, float80_type_node;
32890
32891 /* The __float80 type. */
32892 float80_type_node = long_double_type_node;
32893 if (TYPE_MODE (float80_type_node) != XFmode)
32894 {
32895 /* The __float80 type. */
32896 float80_type_node = make_node (REAL_TYPE);
32897
32898 TYPE_PRECISION (float80_type_node) = 80;
32899 layout_type (float80_type_node);
32900 }
32901 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32902
32903 /* The __float128 type. */
32904 float128_type_node = make_node (REAL_TYPE);
32905 TYPE_PRECISION (float128_type_node) = 128;
32906 layout_type (float128_type_node);
32907 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32908
32909 /* This macro is built by i386-builtin-types.awk. */
32910 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32911 }
32912
32913 static void
32914 ix86_init_builtins (void)
32915 {
32916 tree t;
32917
32918 ix86_init_builtin_types ();
32919
32920 /* Builtins to get CPU type and features. */
32921 ix86_init_platform_type_builtins ();
32922
32923 /* TFmode support builtins. */
32924 def_builtin_const (0, "__builtin_infq",
32925 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32926 def_builtin_const (0, "__builtin_huge_valq",
32927 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32928
32929 /* We will expand them to normal call if SSE isn't available since
32930 they are used by libgcc. */
32931 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32932 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32933 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32934 TREE_READONLY (t) = 1;
32935 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32936
32937 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32938 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32939 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32940 TREE_READONLY (t) = 1;
32941 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32942
32943 ix86_init_tm_builtins ();
32944 ix86_init_mmx_sse_builtins ();
32945
32946 if (TARGET_LP64)
32947 ix86_init_builtins_va_builtins_abi ();
32948
32949 #ifdef SUBTARGET_INIT_BUILTINS
32950 SUBTARGET_INIT_BUILTINS;
32951 #endif
32952 }
32953
32954 /* Return the ix86 builtin for CODE. */
32955
32956 static tree
32957 ix86_builtin_decl (unsigned code, bool)
32958 {
32959 if (code >= IX86_BUILTIN_MAX)
32960 return error_mark_node;
32961
32962 return ix86_builtins[code];
32963 }
32964
32965 /* Errors in the source file can cause expand_expr to return const0_rtx
32966 where we expect a vector. To avoid crashing, use one of the vector
32967 clear instructions. */
32968 static rtx
32969 safe_vector_operand (rtx x, enum machine_mode mode)
32970 {
32971 if (x == const0_rtx)
32972 x = CONST0_RTX (mode);
32973 return x;
32974 }
32975
32976 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32977
32978 static rtx
32979 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32980 {
32981 rtx pat;
32982 tree arg0 = CALL_EXPR_ARG (exp, 0);
32983 tree arg1 = CALL_EXPR_ARG (exp, 1);
32984 rtx op0 = expand_normal (arg0);
32985 rtx op1 = expand_normal (arg1);
32986 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32987 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32988 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32989
32990 if (VECTOR_MODE_P (mode0))
32991 op0 = safe_vector_operand (op0, mode0);
32992 if (VECTOR_MODE_P (mode1))
32993 op1 = safe_vector_operand (op1, mode1);
32994
32995 if (optimize || !target
32996 || GET_MODE (target) != tmode
32997 || !insn_data[icode].operand[0].predicate (target, tmode))
32998 target = gen_reg_rtx (tmode);
32999
33000 if (GET_MODE (op1) == SImode && mode1 == TImode)
33001 {
33002 rtx x = gen_reg_rtx (V4SImode);
33003 emit_insn (gen_sse2_loadd (x, op1));
33004 op1 = gen_lowpart (TImode, x);
33005 }
33006
33007 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33008 op0 = copy_to_mode_reg (mode0, op0);
33009 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33010 op1 = copy_to_mode_reg (mode1, op1);
33011
33012 pat = GEN_FCN (icode) (target, op0, op1);
33013 if (! pat)
33014 return 0;
33015
33016 emit_insn (pat);
33017
33018 return target;
33019 }
33020
33021 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33022
33023 static rtx
33024 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33025 enum ix86_builtin_func_type m_type,
33026 enum rtx_code sub_code)
33027 {
33028 rtx pat;
33029 int i;
33030 int nargs;
33031 bool comparison_p = false;
33032 bool tf_p = false;
33033 bool last_arg_constant = false;
33034 int num_memory = 0;
33035 struct {
33036 rtx op;
33037 enum machine_mode mode;
33038 } args[4];
33039
33040 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33041
33042 switch (m_type)
33043 {
33044 case MULTI_ARG_4_DF2_DI_I:
33045 case MULTI_ARG_4_DF2_DI_I1:
33046 case MULTI_ARG_4_SF2_SI_I:
33047 case MULTI_ARG_4_SF2_SI_I1:
33048 nargs = 4;
33049 last_arg_constant = true;
33050 break;
33051
33052 case MULTI_ARG_3_SF:
33053 case MULTI_ARG_3_DF:
33054 case MULTI_ARG_3_SF2:
33055 case MULTI_ARG_3_DF2:
33056 case MULTI_ARG_3_DI:
33057 case MULTI_ARG_3_SI:
33058 case MULTI_ARG_3_SI_DI:
33059 case MULTI_ARG_3_HI:
33060 case MULTI_ARG_3_HI_SI:
33061 case MULTI_ARG_3_QI:
33062 case MULTI_ARG_3_DI2:
33063 case MULTI_ARG_3_SI2:
33064 case MULTI_ARG_3_HI2:
33065 case MULTI_ARG_3_QI2:
33066 nargs = 3;
33067 break;
33068
33069 case MULTI_ARG_2_SF:
33070 case MULTI_ARG_2_DF:
33071 case MULTI_ARG_2_DI:
33072 case MULTI_ARG_2_SI:
33073 case MULTI_ARG_2_HI:
33074 case MULTI_ARG_2_QI:
33075 nargs = 2;
33076 break;
33077
33078 case MULTI_ARG_2_DI_IMM:
33079 case MULTI_ARG_2_SI_IMM:
33080 case MULTI_ARG_2_HI_IMM:
33081 case MULTI_ARG_2_QI_IMM:
33082 nargs = 2;
33083 last_arg_constant = true;
33084 break;
33085
33086 case MULTI_ARG_1_SF:
33087 case MULTI_ARG_1_DF:
33088 case MULTI_ARG_1_SF2:
33089 case MULTI_ARG_1_DF2:
33090 case MULTI_ARG_1_DI:
33091 case MULTI_ARG_1_SI:
33092 case MULTI_ARG_1_HI:
33093 case MULTI_ARG_1_QI:
33094 case MULTI_ARG_1_SI_DI:
33095 case MULTI_ARG_1_HI_DI:
33096 case MULTI_ARG_1_HI_SI:
33097 case MULTI_ARG_1_QI_DI:
33098 case MULTI_ARG_1_QI_SI:
33099 case MULTI_ARG_1_QI_HI:
33100 nargs = 1;
33101 break;
33102
33103 case MULTI_ARG_2_DI_CMP:
33104 case MULTI_ARG_2_SI_CMP:
33105 case MULTI_ARG_2_HI_CMP:
33106 case MULTI_ARG_2_QI_CMP:
33107 nargs = 2;
33108 comparison_p = true;
33109 break;
33110
33111 case MULTI_ARG_2_SF_TF:
33112 case MULTI_ARG_2_DF_TF:
33113 case MULTI_ARG_2_DI_TF:
33114 case MULTI_ARG_2_SI_TF:
33115 case MULTI_ARG_2_HI_TF:
33116 case MULTI_ARG_2_QI_TF:
33117 nargs = 2;
33118 tf_p = true;
33119 break;
33120
33121 default:
33122 gcc_unreachable ();
33123 }
33124
33125 if (optimize || !target
33126 || GET_MODE (target) != tmode
33127 || !insn_data[icode].operand[0].predicate (target, tmode))
33128 target = gen_reg_rtx (tmode);
33129
33130 gcc_assert (nargs <= 4);
33131
33132 for (i = 0; i < nargs; i++)
33133 {
33134 tree arg = CALL_EXPR_ARG (exp, i);
33135 rtx op = expand_normal (arg);
33136 int adjust = (comparison_p) ? 1 : 0;
33137 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33138
33139 if (last_arg_constant && i == nargs - 1)
33140 {
33141 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33142 {
33143 enum insn_code new_icode = icode;
33144 switch (icode)
33145 {
33146 case CODE_FOR_xop_vpermil2v2df3:
33147 case CODE_FOR_xop_vpermil2v4sf3:
33148 case CODE_FOR_xop_vpermil2v4df3:
33149 case CODE_FOR_xop_vpermil2v8sf3:
33150 error ("the last argument must be a 2-bit immediate");
33151 return gen_reg_rtx (tmode);
33152 case CODE_FOR_xop_rotlv2di3:
33153 new_icode = CODE_FOR_rotlv2di3;
33154 goto xop_rotl;
33155 case CODE_FOR_xop_rotlv4si3:
33156 new_icode = CODE_FOR_rotlv4si3;
33157 goto xop_rotl;
33158 case CODE_FOR_xop_rotlv8hi3:
33159 new_icode = CODE_FOR_rotlv8hi3;
33160 goto xop_rotl;
33161 case CODE_FOR_xop_rotlv16qi3:
33162 new_icode = CODE_FOR_rotlv16qi3;
33163 xop_rotl:
33164 if (CONST_INT_P (op))
33165 {
33166 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33167 op = GEN_INT (INTVAL (op) & mask);
33168 gcc_checking_assert
33169 (insn_data[icode].operand[i + 1].predicate (op, mode));
33170 }
33171 else
33172 {
33173 gcc_checking_assert
33174 (nargs == 2
33175 && insn_data[new_icode].operand[0].mode == tmode
33176 && insn_data[new_icode].operand[1].mode == tmode
33177 && insn_data[new_icode].operand[2].mode == mode
33178 && insn_data[new_icode].operand[0].predicate
33179 == insn_data[icode].operand[0].predicate
33180 && insn_data[new_icode].operand[1].predicate
33181 == insn_data[icode].operand[1].predicate);
33182 icode = new_icode;
33183 goto non_constant;
33184 }
33185 break;
33186 default:
33187 gcc_unreachable ();
33188 }
33189 }
33190 }
33191 else
33192 {
33193 non_constant:
33194 if (VECTOR_MODE_P (mode))
33195 op = safe_vector_operand (op, mode);
33196
33197 /* If we aren't optimizing, only allow one memory operand to be
33198 generated. */
33199 if (memory_operand (op, mode))
33200 num_memory++;
33201
33202 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33203
33204 if (optimize
33205 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33206 || num_memory > 1)
33207 op = force_reg (mode, op);
33208 }
33209
33210 args[i].op = op;
33211 args[i].mode = mode;
33212 }
33213
33214 switch (nargs)
33215 {
33216 case 1:
33217 pat = GEN_FCN (icode) (target, args[0].op);
33218 break;
33219
33220 case 2:
33221 if (tf_p)
33222 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33223 GEN_INT ((int)sub_code));
33224 else if (! comparison_p)
33225 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33226 else
33227 {
33228 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33229 args[0].op,
33230 args[1].op);
33231
33232 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33233 }
33234 break;
33235
33236 case 3:
33237 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33238 break;
33239
33240 case 4:
33241 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33242 break;
33243
33244 default:
33245 gcc_unreachable ();
33246 }
33247
33248 if (! pat)
33249 return 0;
33250
33251 emit_insn (pat);
33252 return target;
33253 }
33254
33255 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33256 insns with vec_merge. */
33257
33258 static rtx
33259 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33260 rtx target)
33261 {
33262 rtx pat;
33263 tree arg0 = CALL_EXPR_ARG (exp, 0);
33264 rtx op1, op0 = expand_normal (arg0);
33265 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33266 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33267
33268 if (optimize || !target
33269 || GET_MODE (target) != tmode
33270 || !insn_data[icode].operand[0].predicate (target, tmode))
33271 target = gen_reg_rtx (tmode);
33272
33273 if (VECTOR_MODE_P (mode0))
33274 op0 = safe_vector_operand (op0, mode0);
33275
33276 if ((optimize && !register_operand (op0, mode0))
33277 || !insn_data[icode].operand[1].predicate (op0, mode0))
33278 op0 = copy_to_mode_reg (mode0, op0);
33279
33280 op1 = op0;
33281 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33282 op1 = copy_to_mode_reg (mode0, op1);
33283
33284 pat = GEN_FCN (icode) (target, op0, op1);
33285 if (! pat)
33286 return 0;
33287 emit_insn (pat);
33288 return target;
33289 }
33290
33291 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33292
33293 static rtx
33294 ix86_expand_sse_compare (const struct builtin_description *d,
33295 tree exp, rtx target, bool swap)
33296 {
33297 rtx pat;
33298 tree arg0 = CALL_EXPR_ARG (exp, 0);
33299 tree arg1 = CALL_EXPR_ARG (exp, 1);
33300 rtx op0 = expand_normal (arg0);
33301 rtx op1 = expand_normal (arg1);
33302 rtx op2;
33303 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33304 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33305 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33306 enum rtx_code comparison = d->comparison;
33307
33308 if (VECTOR_MODE_P (mode0))
33309 op0 = safe_vector_operand (op0, mode0);
33310 if (VECTOR_MODE_P (mode1))
33311 op1 = safe_vector_operand (op1, mode1);
33312
33313 /* Swap operands if we have a comparison that isn't available in
33314 hardware. */
33315 if (swap)
33316 {
33317 rtx tmp = gen_reg_rtx (mode1);
33318 emit_move_insn (tmp, op1);
33319 op1 = op0;
33320 op0 = tmp;
33321 }
33322
33323 if (optimize || !target
33324 || GET_MODE (target) != tmode
33325 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33326 target = gen_reg_rtx (tmode);
33327
33328 if ((optimize && !register_operand (op0, mode0))
33329 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33330 op0 = copy_to_mode_reg (mode0, op0);
33331 if ((optimize && !register_operand (op1, mode1))
33332 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33333 op1 = copy_to_mode_reg (mode1, op1);
33334
33335 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33336 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33337 if (! pat)
33338 return 0;
33339 emit_insn (pat);
33340 return target;
33341 }
33342
33343 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33344
33345 static rtx
33346 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33347 rtx target)
33348 {
33349 rtx pat;
33350 tree arg0 = CALL_EXPR_ARG (exp, 0);
33351 tree arg1 = CALL_EXPR_ARG (exp, 1);
33352 rtx op0 = expand_normal (arg0);
33353 rtx op1 = expand_normal (arg1);
33354 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33355 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33356 enum rtx_code comparison = d->comparison;
33357
33358 if (VECTOR_MODE_P (mode0))
33359 op0 = safe_vector_operand (op0, mode0);
33360 if (VECTOR_MODE_P (mode1))
33361 op1 = safe_vector_operand (op1, mode1);
33362
33363 /* Swap operands if we have a comparison that isn't available in
33364 hardware. */
33365 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33366 {
33367 rtx tmp = op1;
33368 op1 = op0;
33369 op0 = tmp;
33370 }
33371
33372 target = gen_reg_rtx (SImode);
33373 emit_move_insn (target, const0_rtx);
33374 target = gen_rtx_SUBREG (QImode, target, 0);
33375
33376 if ((optimize && !register_operand (op0, mode0))
33377 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33378 op0 = copy_to_mode_reg (mode0, op0);
33379 if ((optimize && !register_operand (op1, mode1))
33380 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33381 op1 = copy_to_mode_reg (mode1, op1);
33382
33383 pat = GEN_FCN (d->icode) (op0, op1);
33384 if (! pat)
33385 return 0;
33386 emit_insn (pat);
33387 emit_insn (gen_rtx_SET (VOIDmode,
33388 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33389 gen_rtx_fmt_ee (comparison, QImode,
33390 SET_DEST (pat),
33391 const0_rtx)));
33392
33393 return SUBREG_REG (target);
33394 }
33395
33396 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33397
33398 static rtx
33399 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33400 rtx target)
33401 {
33402 rtx pat;
33403 tree arg0 = CALL_EXPR_ARG (exp, 0);
33404 rtx op1, op0 = expand_normal (arg0);
33405 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33406 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33407
33408 if (optimize || target == 0
33409 || GET_MODE (target) != tmode
33410 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33411 target = gen_reg_rtx (tmode);
33412
33413 if (VECTOR_MODE_P (mode0))
33414 op0 = safe_vector_operand (op0, mode0);
33415
33416 if ((optimize && !register_operand (op0, mode0))
33417 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33418 op0 = copy_to_mode_reg (mode0, op0);
33419
33420 op1 = GEN_INT (d->comparison);
33421
33422 pat = GEN_FCN (d->icode) (target, op0, op1);
33423 if (! pat)
33424 return 0;
33425 emit_insn (pat);
33426 return target;
33427 }
33428
33429 static rtx
33430 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33431 tree exp, rtx target)
33432 {
33433 rtx pat;
33434 tree arg0 = CALL_EXPR_ARG (exp, 0);
33435 tree arg1 = CALL_EXPR_ARG (exp, 1);
33436 rtx op0 = expand_normal (arg0);
33437 rtx op1 = expand_normal (arg1);
33438 rtx op2;
33439 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33440 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33441 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33442
33443 if (optimize || target == 0
33444 || GET_MODE (target) != tmode
33445 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33446 target = gen_reg_rtx (tmode);
33447
33448 op0 = safe_vector_operand (op0, mode0);
33449 op1 = safe_vector_operand (op1, mode1);
33450
33451 if ((optimize && !register_operand (op0, mode0))
33452 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33453 op0 = copy_to_mode_reg (mode0, op0);
33454 if ((optimize && !register_operand (op1, mode1))
33455 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33456 op1 = copy_to_mode_reg (mode1, op1);
33457
33458 op2 = GEN_INT (d->comparison);
33459
33460 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33461 if (! pat)
33462 return 0;
33463 emit_insn (pat);
33464 return target;
33465 }
33466
33467 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33468
33469 static rtx
33470 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33471 rtx target)
33472 {
33473 rtx pat;
33474 tree arg0 = CALL_EXPR_ARG (exp, 0);
33475 tree arg1 = CALL_EXPR_ARG (exp, 1);
33476 rtx op0 = expand_normal (arg0);
33477 rtx op1 = expand_normal (arg1);
33478 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33479 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33480 enum rtx_code comparison = d->comparison;
33481
33482 if (VECTOR_MODE_P (mode0))
33483 op0 = safe_vector_operand (op0, mode0);
33484 if (VECTOR_MODE_P (mode1))
33485 op1 = safe_vector_operand (op1, mode1);
33486
33487 target = gen_reg_rtx (SImode);
33488 emit_move_insn (target, const0_rtx);
33489 target = gen_rtx_SUBREG (QImode, target, 0);
33490
33491 if ((optimize && !register_operand (op0, mode0))
33492 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33493 op0 = copy_to_mode_reg (mode0, op0);
33494 if ((optimize && !register_operand (op1, mode1))
33495 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33496 op1 = copy_to_mode_reg (mode1, op1);
33497
33498 pat = GEN_FCN (d->icode) (op0, op1);
33499 if (! pat)
33500 return 0;
33501 emit_insn (pat);
33502 emit_insn (gen_rtx_SET (VOIDmode,
33503 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33504 gen_rtx_fmt_ee (comparison, QImode,
33505 SET_DEST (pat),
33506 const0_rtx)));
33507
33508 return SUBREG_REG (target);
33509 }
33510
33511 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33512
33513 static rtx
33514 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33515 tree exp, rtx target)
33516 {
33517 rtx pat;
33518 tree arg0 = CALL_EXPR_ARG (exp, 0);
33519 tree arg1 = CALL_EXPR_ARG (exp, 1);
33520 tree arg2 = CALL_EXPR_ARG (exp, 2);
33521 tree arg3 = CALL_EXPR_ARG (exp, 3);
33522 tree arg4 = CALL_EXPR_ARG (exp, 4);
33523 rtx scratch0, scratch1;
33524 rtx op0 = expand_normal (arg0);
33525 rtx op1 = expand_normal (arg1);
33526 rtx op2 = expand_normal (arg2);
33527 rtx op3 = expand_normal (arg3);
33528 rtx op4 = expand_normal (arg4);
33529 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33530
33531 tmode0 = insn_data[d->icode].operand[0].mode;
33532 tmode1 = insn_data[d->icode].operand[1].mode;
33533 modev2 = insn_data[d->icode].operand[2].mode;
33534 modei3 = insn_data[d->icode].operand[3].mode;
33535 modev4 = insn_data[d->icode].operand[4].mode;
33536 modei5 = insn_data[d->icode].operand[5].mode;
33537 modeimm = insn_data[d->icode].operand[6].mode;
33538
33539 if (VECTOR_MODE_P (modev2))
33540 op0 = safe_vector_operand (op0, modev2);
33541 if (VECTOR_MODE_P (modev4))
33542 op2 = safe_vector_operand (op2, modev4);
33543
33544 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33545 op0 = copy_to_mode_reg (modev2, op0);
33546 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33547 op1 = copy_to_mode_reg (modei3, op1);
33548 if ((optimize && !register_operand (op2, modev4))
33549 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33550 op2 = copy_to_mode_reg (modev4, op2);
33551 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33552 op3 = copy_to_mode_reg (modei5, op3);
33553
33554 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33555 {
33556 error ("the fifth argument must be an 8-bit immediate");
33557 return const0_rtx;
33558 }
33559
33560 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33561 {
33562 if (optimize || !target
33563 || GET_MODE (target) != tmode0
33564 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33565 target = gen_reg_rtx (tmode0);
33566
33567 scratch1 = gen_reg_rtx (tmode1);
33568
33569 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33570 }
33571 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33572 {
33573 if (optimize || !target
33574 || GET_MODE (target) != tmode1
33575 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33576 target = gen_reg_rtx (tmode1);
33577
33578 scratch0 = gen_reg_rtx (tmode0);
33579
33580 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33581 }
33582 else
33583 {
33584 gcc_assert (d->flag);
33585
33586 scratch0 = gen_reg_rtx (tmode0);
33587 scratch1 = gen_reg_rtx (tmode1);
33588
33589 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33590 }
33591
33592 if (! pat)
33593 return 0;
33594
33595 emit_insn (pat);
33596
33597 if (d->flag)
33598 {
33599 target = gen_reg_rtx (SImode);
33600 emit_move_insn (target, const0_rtx);
33601 target = gen_rtx_SUBREG (QImode, target, 0);
33602
33603 emit_insn
33604 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33605 gen_rtx_fmt_ee (EQ, QImode,
33606 gen_rtx_REG ((enum machine_mode) d->flag,
33607 FLAGS_REG),
33608 const0_rtx)));
33609 return SUBREG_REG (target);
33610 }
33611 else
33612 return target;
33613 }
33614
33615
33616 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33617
33618 static rtx
33619 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33620 tree exp, rtx target)
33621 {
33622 rtx pat;
33623 tree arg0 = CALL_EXPR_ARG (exp, 0);
33624 tree arg1 = CALL_EXPR_ARG (exp, 1);
33625 tree arg2 = CALL_EXPR_ARG (exp, 2);
33626 rtx scratch0, scratch1;
33627 rtx op0 = expand_normal (arg0);
33628 rtx op1 = expand_normal (arg1);
33629 rtx op2 = expand_normal (arg2);
33630 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33631
33632 tmode0 = insn_data[d->icode].operand[0].mode;
33633 tmode1 = insn_data[d->icode].operand[1].mode;
33634 modev2 = insn_data[d->icode].operand[2].mode;
33635 modev3 = insn_data[d->icode].operand[3].mode;
33636 modeimm = insn_data[d->icode].operand[4].mode;
33637
33638 if (VECTOR_MODE_P (modev2))
33639 op0 = safe_vector_operand (op0, modev2);
33640 if (VECTOR_MODE_P (modev3))
33641 op1 = safe_vector_operand (op1, modev3);
33642
33643 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33644 op0 = copy_to_mode_reg (modev2, op0);
33645 if ((optimize && !register_operand (op1, modev3))
33646 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33647 op1 = copy_to_mode_reg (modev3, op1);
33648
33649 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33650 {
33651 error ("the third argument must be an 8-bit immediate");
33652 return const0_rtx;
33653 }
33654
33655 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33656 {
33657 if (optimize || !target
33658 || GET_MODE (target) != tmode0
33659 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33660 target = gen_reg_rtx (tmode0);
33661
33662 scratch1 = gen_reg_rtx (tmode1);
33663
33664 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33665 }
33666 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33667 {
33668 if (optimize || !target
33669 || GET_MODE (target) != tmode1
33670 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33671 target = gen_reg_rtx (tmode1);
33672
33673 scratch0 = gen_reg_rtx (tmode0);
33674
33675 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33676 }
33677 else
33678 {
33679 gcc_assert (d->flag);
33680
33681 scratch0 = gen_reg_rtx (tmode0);
33682 scratch1 = gen_reg_rtx (tmode1);
33683
33684 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33685 }
33686
33687 if (! pat)
33688 return 0;
33689
33690 emit_insn (pat);
33691
33692 if (d->flag)
33693 {
33694 target = gen_reg_rtx (SImode);
33695 emit_move_insn (target, const0_rtx);
33696 target = gen_rtx_SUBREG (QImode, target, 0);
33697
33698 emit_insn
33699 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33700 gen_rtx_fmt_ee (EQ, QImode,
33701 gen_rtx_REG ((enum machine_mode) d->flag,
33702 FLAGS_REG),
33703 const0_rtx)));
33704 return SUBREG_REG (target);
33705 }
33706 else
33707 return target;
33708 }
33709
33710 /* Subroutine of ix86_expand_builtin to take care of insns with
33711 variable number of operands. */
33712
33713 static rtx
33714 ix86_expand_args_builtin (const struct builtin_description *d,
33715 tree exp, rtx target)
33716 {
33717 rtx pat, real_target;
33718 unsigned int i, nargs;
33719 unsigned int nargs_constant = 0;
33720 unsigned int mask_pos = 0;
33721 int num_memory = 0;
33722 struct
33723 {
33724 rtx op;
33725 enum machine_mode mode;
33726 } args[6];
33727 bool last_arg_count = false;
33728 enum insn_code icode = d->icode;
33729 const struct insn_data_d *insn_p = &insn_data[icode];
33730 enum machine_mode tmode = insn_p->operand[0].mode;
33731 enum machine_mode rmode = VOIDmode;
33732 bool swap = false;
33733 enum rtx_code comparison = d->comparison;
33734
33735 switch ((enum ix86_builtin_func_type) d->flag)
33736 {
33737 case V2DF_FTYPE_V2DF_ROUND:
33738 case V4DF_FTYPE_V4DF_ROUND:
33739 case V4SF_FTYPE_V4SF_ROUND:
33740 case V8SF_FTYPE_V8SF_ROUND:
33741 case V4SI_FTYPE_V4SF_ROUND:
33742 case V8SI_FTYPE_V8SF_ROUND:
33743 return ix86_expand_sse_round (d, exp, target);
33744 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33745 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33746 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33747 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33748 case INT_FTYPE_V8SF_V8SF_PTEST:
33749 case INT_FTYPE_V4DI_V4DI_PTEST:
33750 case INT_FTYPE_V4DF_V4DF_PTEST:
33751 case INT_FTYPE_V4SF_V4SF_PTEST:
33752 case INT_FTYPE_V2DI_V2DI_PTEST:
33753 case INT_FTYPE_V2DF_V2DF_PTEST:
33754 return ix86_expand_sse_ptest (d, exp, target);
33755 case FLOAT128_FTYPE_FLOAT128:
33756 case FLOAT_FTYPE_FLOAT:
33757 case INT_FTYPE_INT:
33758 case UINT64_FTYPE_INT:
33759 case UINT16_FTYPE_UINT16:
33760 case INT64_FTYPE_INT64:
33761 case INT64_FTYPE_V4SF:
33762 case INT64_FTYPE_V2DF:
33763 case INT_FTYPE_V16QI:
33764 case INT_FTYPE_V8QI:
33765 case INT_FTYPE_V8SF:
33766 case INT_FTYPE_V4DF:
33767 case INT_FTYPE_V4SF:
33768 case INT_FTYPE_V2DF:
33769 case INT_FTYPE_V32QI:
33770 case V16QI_FTYPE_V16QI:
33771 case V8SI_FTYPE_V8SF:
33772 case V8SI_FTYPE_V4SI:
33773 case V8HI_FTYPE_V8HI:
33774 case V8HI_FTYPE_V16QI:
33775 case V8QI_FTYPE_V8QI:
33776 case V8SF_FTYPE_V8SF:
33777 case V8SF_FTYPE_V8SI:
33778 case V8SF_FTYPE_V4SF:
33779 case V8SF_FTYPE_V8HI:
33780 case V4SI_FTYPE_V4SI:
33781 case V4SI_FTYPE_V16QI:
33782 case V4SI_FTYPE_V4SF:
33783 case V4SI_FTYPE_V8SI:
33784 case V4SI_FTYPE_V8HI:
33785 case V4SI_FTYPE_V4DF:
33786 case V4SI_FTYPE_V2DF:
33787 case V4HI_FTYPE_V4HI:
33788 case V4DF_FTYPE_V4DF:
33789 case V4DF_FTYPE_V4SI:
33790 case V4DF_FTYPE_V4SF:
33791 case V4DF_FTYPE_V2DF:
33792 case V4SF_FTYPE_V4SF:
33793 case V4SF_FTYPE_V4SI:
33794 case V4SF_FTYPE_V8SF:
33795 case V4SF_FTYPE_V4DF:
33796 case V4SF_FTYPE_V8HI:
33797 case V4SF_FTYPE_V2DF:
33798 case V2DI_FTYPE_V2DI:
33799 case V2DI_FTYPE_V16QI:
33800 case V2DI_FTYPE_V8HI:
33801 case V2DI_FTYPE_V4SI:
33802 case V2DF_FTYPE_V2DF:
33803 case V2DF_FTYPE_V4SI:
33804 case V2DF_FTYPE_V4DF:
33805 case V2DF_FTYPE_V4SF:
33806 case V2DF_FTYPE_V2SI:
33807 case V2SI_FTYPE_V2SI:
33808 case V2SI_FTYPE_V4SF:
33809 case V2SI_FTYPE_V2SF:
33810 case V2SI_FTYPE_V2DF:
33811 case V2SF_FTYPE_V2SF:
33812 case V2SF_FTYPE_V2SI:
33813 case V32QI_FTYPE_V32QI:
33814 case V32QI_FTYPE_V16QI:
33815 case V16HI_FTYPE_V16HI:
33816 case V16HI_FTYPE_V8HI:
33817 case V8SI_FTYPE_V8SI:
33818 case V16HI_FTYPE_V16QI:
33819 case V8SI_FTYPE_V16QI:
33820 case V4DI_FTYPE_V16QI:
33821 case V8SI_FTYPE_V8HI:
33822 case V4DI_FTYPE_V8HI:
33823 case V4DI_FTYPE_V4SI:
33824 case V4DI_FTYPE_V2DI:
33825 case HI_FTYPE_HI:
33826 case UINT_FTYPE_V2DF:
33827 case UINT_FTYPE_V4SF:
33828 case UINT64_FTYPE_V2DF:
33829 case UINT64_FTYPE_V4SF:
33830 case V16QI_FTYPE_V8DI:
33831 case V16HI_FTYPE_V16SI:
33832 case V16SI_FTYPE_HI:
33833 case V16SI_FTYPE_V16SI:
33834 case V16SI_FTYPE_INT:
33835 case V16SF_FTYPE_FLOAT:
33836 case V16SF_FTYPE_V8SF:
33837 case V16SI_FTYPE_V8SI:
33838 case V16SF_FTYPE_V4SF:
33839 case V16SI_FTYPE_V4SI:
33840 case V16SF_FTYPE_V16SF:
33841 case V8HI_FTYPE_V8DI:
33842 case V8UHI_FTYPE_V8UHI:
33843 case V8SI_FTYPE_V8DI:
33844 case V8USI_FTYPE_V8USI:
33845 case V8SF_FTYPE_V8DF:
33846 case V8DI_FTYPE_QI:
33847 case V8DI_FTYPE_INT64:
33848 case V8DI_FTYPE_V4DI:
33849 case V8DI_FTYPE_V8DI:
33850 case V8DF_FTYPE_DOUBLE:
33851 case V8DF_FTYPE_V4DF:
33852 case V8DF_FTYPE_V2DF:
33853 case V8DF_FTYPE_V8DF:
33854 case V8DF_FTYPE_V8SI:
33855 nargs = 1;
33856 break;
33857 case V4SF_FTYPE_V4SF_VEC_MERGE:
33858 case V2DF_FTYPE_V2DF_VEC_MERGE:
33859 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33860 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33861 case V16QI_FTYPE_V16QI_V16QI:
33862 case V16QI_FTYPE_V8HI_V8HI:
33863 case V16SI_FTYPE_V16SI_V16SI:
33864 case V16SF_FTYPE_V16SF_V16SF:
33865 case V16SF_FTYPE_V16SF_V16SI:
33866 case V8QI_FTYPE_V8QI_V8QI:
33867 case V8QI_FTYPE_V4HI_V4HI:
33868 case V8HI_FTYPE_V8HI_V8HI:
33869 case V8HI_FTYPE_V16QI_V16QI:
33870 case V8HI_FTYPE_V4SI_V4SI:
33871 case V8SF_FTYPE_V8SF_V8SF:
33872 case V8SF_FTYPE_V8SF_V8SI:
33873 case V8DI_FTYPE_V8DI_V8DI:
33874 case V8DF_FTYPE_V8DF_V8DF:
33875 case V8DF_FTYPE_V8DF_V8DI:
33876 case V4SI_FTYPE_V4SI_V4SI:
33877 case V4SI_FTYPE_V8HI_V8HI:
33878 case V4SI_FTYPE_V4SF_V4SF:
33879 case V4SI_FTYPE_V2DF_V2DF:
33880 case V4HI_FTYPE_V4HI_V4HI:
33881 case V4HI_FTYPE_V8QI_V8QI:
33882 case V4HI_FTYPE_V2SI_V2SI:
33883 case V4DF_FTYPE_V4DF_V4DF:
33884 case V4DF_FTYPE_V4DF_V4DI:
33885 case V4SF_FTYPE_V4SF_V4SF:
33886 case V4SF_FTYPE_V4SF_V4SI:
33887 case V4SF_FTYPE_V4SF_V2SI:
33888 case V4SF_FTYPE_V4SF_V2DF:
33889 case V4SF_FTYPE_V4SF_UINT:
33890 case V4SF_FTYPE_V4SF_UINT64:
33891 case V4SF_FTYPE_V4SF_DI:
33892 case V4SF_FTYPE_V4SF_SI:
33893 case V2DI_FTYPE_V2DI_V2DI:
33894 case V2DI_FTYPE_V16QI_V16QI:
33895 case V2DI_FTYPE_V4SI_V4SI:
33896 case V2UDI_FTYPE_V4USI_V4USI:
33897 case V2DI_FTYPE_V2DI_V16QI:
33898 case V2DI_FTYPE_V2DF_V2DF:
33899 case V2SI_FTYPE_V2SI_V2SI:
33900 case V2SI_FTYPE_V4HI_V4HI:
33901 case V2SI_FTYPE_V2SF_V2SF:
33902 case V2DF_FTYPE_V2DF_V2DF:
33903 case V2DF_FTYPE_V2DF_V4SF:
33904 case V2DF_FTYPE_V2DF_V2DI:
33905 case V2DF_FTYPE_V2DF_DI:
33906 case V2DF_FTYPE_V2DF_SI:
33907 case V2DF_FTYPE_V2DF_UINT:
33908 case V2DF_FTYPE_V2DF_UINT64:
33909 case V2SF_FTYPE_V2SF_V2SF:
33910 case V1DI_FTYPE_V1DI_V1DI:
33911 case V1DI_FTYPE_V8QI_V8QI:
33912 case V1DI_FTYPE_V2SI_V2SI:
33913 case V32QI_FTYPE_V16HI_V16HI:
33914 case V16HI_FTYPE_V8SI_V8SI:
33915 case V32QI_FTYPE_V32QI_V32QI:
33916 case V16HI_FTYPE_V32QI_V32QI:
33917 case V16HI_FTYPE_V16HI_V16HI:
33918 case V8SI_FTYPE_V4DF_V4DF:
33919 case V8SI_FTYPE_V8SI_V8SI:
33920 case V8SI_FTYPE_V16HI_V16HI:
33921 case V4DI_FTYPE_V4DI_V4DI:
33922 case V4DI_FTYPE_V8SI_V8SI:
33923 case V4UDI_FTYPE_V8USI_V8USI:
33924 case QI_FTYPE_V8DI_V8DI:
33925 case HI_FTYPE_V16SI_V16SI:
33926 if (comparison == UNKNOWN)
33927 return ix86_expand_binop_builtin (icode, exp, target);
33928 nargs = 2;
33929 break;
33930 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33931 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33932 gcc_assert (comparison != UNKNOWN);
33933 nargs = 2;
33934 swap = true;
33935 break;
33936 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33937 case V16HI_FTYPE_V16HI_SI_COUNT:
33938 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33939 case V8SI_FTYPE_V8SI_SI_COUNT:
33940 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33941 case V4DI_FTYPE_V4DI_INT_COUNT:
33942 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33943 case V8HI_FTYPE_V8HI_SI_COUNT:
33944 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33945 case V4SI_FTYPE_V4SI_SI_COUNT:
33946 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33947 case V4HI_FTYPE_V4HI_SI_COUNT:
33948 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33949 case V2DI_FTYPE_V2DI_SI_COUNT:
33950 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33951 case V2SI_FTYPE_V2SI_SI_COUNT:
33952 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33953 case V1DI_FTYPE_V1DI_SI_COUNT:
33954 nargs = 2;
33955 last_arg_count = true;
33956 break;
33957 case UINT64_FTYPE_UINT64_UINT64:
33958 case UINT_FTYPE_UINT_UINT:
33959 case UINT_FTYPE_UINT_USHORT:
33960 case UINT_FTYPE_UINT_UCHAR:
33961 case UINT16_FTYPE_UINT16_INT:
33962 case UINT8_FTYPE_UINT8_INT:
33963 case HI_FTYPE_HI_HI:
33964 case V16SI_FTYPE_V8DF_V8DF:
33965 nargs = 2;
33966 break;
33967 case V2DI_FTYPE_V2DI_INT_CONVERT:
33968 nargs = 2;
33969 rmode = V1TImode;
33970 nargs_constant = 1;
33971 break;
33972 case V4DI_FTYPE_V4DI_INT_CONVERT:
33973 nargs = 2;
33974 rmode = V2TImode;
33975 nargs_constant = 1;
33976 break;
33977 case V8HI_FTYPE_V8HI_INT:
33978 case V8HI_FTYPE_V8SF_INT:
33979 case V16HI_FTYPE_V16SF_INT:
33980 case V8HI_FTYPE_V4SF_INT:
33981 case V8SF_FTYPE_V8SF_INT:
33982 case V4SF_FTYPE_V16SF_INT:
33983 case V16SF_FTYPE_V16SF_INT:
33984 case V4SI_FTYPE_V4SI_INT:
33985 case V4SI_FTYPE_V8SI_INT:
33986 case V4HI_FTYPE_V4HI_INT:
33987 case V4DF_FTYPE_V4DF_INT:
33988 case V4DF_FTYPE_V8DF_INT:
33989 case V4SF_FTYPE_V4SF_INT:
33990 case V4SF_FTYPE_V8SF_INT:
33991 case V2DI_FTYPE_V2DI_INT:
33992 case V2DF_FTYPE_V2DF_INT:
33993 case V2DF_FTYPE_V4DF_INT:
33994 case V16HI_FTYPE_V16HI_INT:
33995 case V8SI_FTYPE_V8SI_INT:
33996 case V16SI_FTYPE_V16SI_INT:
33997 case V4SI_FTYPE_V16SI_INT:
33998 case V4DI_FTYPE_V4DI_INT:
33999 case V2DI_FTYPE_V4DI_INT:
34000 case V4DI_FTYPE_V8DI_INT:
34001 case HI_FTYPE_HI_INT:
34002 nargs = 2;
34003 nargs_constant = 1;
34004 break;
34005 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34006 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34007 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34008 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34009 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34010 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34011 case HI_FTYPE_V16SI_V16SI_HI:
34012 case QI_FTYPE_V8DI_V8DI_QI:
34013 case V16HI_FTYPE_V16SI_V16HI_HI:
34014 case V16QI_FTYPE_V16SI_V16QI_HI:
34015 case V16QI_FTYPE_V8DI_V16QI_QI:
34016 case V16SF_FTYPE_V16SF_V16SF_HI:
34017 case V16SF_FTYPE_V16SF_V16SF_V16SF:
34018 case V16SF_FTYPE_V16SF_V16SI_V16SF:
34019 case V16SF_FTYPE_V16SI_V16SF_HI:
34020 case V16SF_FTYPE_V16SI_V16SF_V16SF:
34021 case V16SF_FTYPE_V4SF_V16SF_HI:
34022 case V16SI_FTYPE_SI_V16SI_HI:
34023 case V16SI_FTYPE_V16HI_V16SI_HI:
34024 case V16SI_FTYPE_V16QI_V16SI_HI:
34025 case V16SI_FTYPE_V16SF_V16SI_HI:
34026 case V16SI_FTYPE_V16SI_V16SI_HI:
34027 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34028 case V16SI_FTYPE_V4SI_V16SI_HI:
34029 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34030 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34031 case V8DF_FTYPE_V2DF_V8DF_QI:
34032 case V8DF_FTYPE_V4DF_V8DF_QI:
34033 case V8DF_FTYPE_V8DF_V8DF_QI:
34034 case V8DF_FTYPE_V8DF_V8DF_V8DF:
34035 case V8DF_FTYPE_V8DF_V8DI_V8DF:
34036 case V8DF_FTYPE_V8DI_V8DF_V8DF:
34037 case V8DF_FTYPE_V8SF_V8DF_QI:
34038 case V8DF_FTYPE_V8SI_V8DF_QI:
34039 case V8DI_FTYPE_DI_V8DI_QI:
34040 case V8DI_FTYPE_V16QI_V8DI_QI:
34041 case V8DI_FTYPE_V2DI_V8DI_QI:
34042 case V8DI_FTYPE_V4DI_V8DI_QI:
34043 case V8DI_FTYPE_V8DI_V8DI_QI:
34044 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34045 case V8DI_FTYPE_V8HI_V8DI_QI:
34046 case V8DI_FTYPE_V8SI_V8DI_QI:
34047 case V8HI_FTYPE_V8DI_V8HI_QI:
34048 case V8SF_FTYPE_V8DF_V8SF_QI:
34049 case V8SI_FTYPE_V8DF_V8SI_QI:
34050 case V8SI_FTYPE_V8DI_V8SI_QI:
34051 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34052 nargs = 3;
34053 break;
34054 case V32QI_FTYPE_V32QI_V32QI_INT:
34055 case V16HI_FTYPE_V16HI_V16HI_INT:
34056 case V16QI_FTYPE_V16QI_V16QI_INT:
34057 case V4DI_FTYPE_V4DI_V4DI_INT:
34058 case V8HI_FTYPE_V8HI_V8HI_INT:
34059 case V8SI_FTYPE_V8SI_V8SI_INT:
34060 case V8SI_FTYPE_V8SI_V4SI_INT:
34061 case V8SF_FTYPE_V8SF_V8SF_INT:
34062 case V8SF_FTYPE_V8SF_V4SF_INT:
34063 case V4SI_FTYPE_V4SI_V4SI_INT:
34064 case V4DF_FTYPE_V4DF_V4DF_INT:
34065 case V16SF_FTYPE_V16SF_V16SF_INT:
34066 case V16SF_FTYPE_V16SF_V4SF_INT:
34067 case V16SI_FTYPE_V16SI_V4SI_INT:
34068 case V4DF_FTYPE_V4DF_V2DF_INT:
34069 case V4SF_FTYPE_V4SF_V4SF_INT:
34070 case V2DI_FTYPE_V2DI_V2DI_INT:
34071 case V4DI_FTYPE_V4DI_V2DI_INT:
34072 case V2DF_FTYPE_V2DF_V2DF_INT:
34073 case QI_FTYPE_V8DI_V8DI_INT:
34074 case QI_FTYPE_V8DF_V8DF_INT:
34075 case QI_FTYPE_V2DF_V2DF_INT:
34076 case QI_FTYPE_V4SF_V4SF_INT:
34077 case HI_FTYPE_V16SI_V16SI_INT:
34078 case HI_FTYPE_V16SF_V16SF_INT:
34079 nargs = 3;
34080 nargs_constant = 1;
34081 break;
34082 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
34083 nargs = 3;
34084 rmode = V4DImode;
34085 nargs_constant = 1;
34086 break;
34087 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
34088 nargs = 3;
34089 rmode = V2DImode;
34090 nargs_constant = 1;
34091 break;
34092 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
34093 nargs = 3;
34094 rmode = DImode;
34095 nargs_constant = 1;
34096 break;
34097 case V2DI_FTYPE_V2DI_UINT_UINT:
34098 nargs = 3;
34099 nargs_constant = 2;
34100 break;
34101 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
34102 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
34103 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
34104 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
34105 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
34106 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
34107 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
34108 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
34109 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
34110 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
34111 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
34112 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
34113 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
34114 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
34115 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
34116 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
34117 nargs = 4;
34118 break;
34119 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
34120 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34121 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34122 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34123 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34124 nargs = 4;
34125 nargs_constant = 1;
34126 break;
34127 case QI_FTYPE_V2DF_V2DF_INT_QI:
34128 case QI_FTYPE_V4SF_V4SF_INT_QI:
34129 nargs = 4;
34130 mask_pos = 1;
34131 nargs_constant = 1;
34132 break;
34133 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34134 nargs = 4;
34135 nargs_constant = 2;
34136 break;
34137 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34138 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34139 nargs = 4;
34140 break;
34141 case QI_FTYPE_V8DI_V8DI_INT_QI:
34142 case HI_FTYPE_V16SI_V16SI_INT_HI:
34143 case QI_FTYPE_V8DF_V8DF_INT_QI:
34144 case HI_FTYPE_V16SF_V16SF_INT_HI:
34145 mask_pos = 1;
34146 nargs = 4;
34147 nargs_constant = 1;
34148 break;
34149 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34150 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34151 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34152 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34153 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34154 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34155 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34156 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34157 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34158 nargs = 4;
34159 mask_pos = 2;
34160 nargs_constant = 1;
34161 break;
34162 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34163 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34164 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34165 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34166 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34167 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34168 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34169 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34170 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34171 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34172 nargs = 5;
34173 mask_pos = 2;
34174 nargs_constant = 1;
34175 break;
34176 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34177 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34178 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34179 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34180 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34181 nargs = 5;
34182 mask_pos = 1;
34183 nargs_constant = 1;
34184 break;
34185
34186 default:
34187 gcc_unreachable ();
34188 }
34189
34190 gcc_assert (nargs <= ARRAY_SIZE (args));
34191
34192 if (comparison != UNKNOWN)
34193 {
34194 gcc_assert (nargs == 2);
34195 return ix86_expand_sse_compare (d, exp, target, swap);
34196 }
34197
34198 if (rmode == VOIDmode || rmode == tmode)
34199 {
34200 if (optimize
34201 || target == 0
34202 || GET_MODE (target) != tmode
34203 || !insn_p->operand[0].predicate (target, tmode))
34204 target = gen_reg_rtx (tmode);
34205 real_target = target;
34206 }
34207 else
34208 {
34209 real_target = gen_reg_rtx (tmode);
34210 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34211 }
34212
34213 for (i = 0; i < nargs; i++)
34214 {
34215 tree arg = CALL_EXPR_ARG (exp, i);
34216 rtx op = expand_normal (arg);
34217 enum machine_mode mode = insn_p->operand[i + 1].mode;
34218 bool match = insn_p->operand[i + 1].predicate (op, mode);
34219
34220 if (last_arg_count && (i + 1) == nargs)
34221 {
34222 /* SIMD shift insns take either an 8-bit immediate or
34223 register as count. But builtin functions take int as
34224 count. If count doesn't match, we put it in register. */
34225 if (!match)
34226 {
34227 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34228 if (!insn_p->operand[i + 1].predicate (op, mode))
34229 op = copy_to_reg (op);
34230 }
34231 }
34232 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34233 (!mask_pos && (nargs - i) <= nargs_constant))
34234 {
34235 if (!match)
34236 switch (icode)
34237 {
34238 case CODE_FOR_avx_vinsertf128v4di:
34239 case CODE_FOR_avx_vextractf128v4di:
34240 error ("the last argument must be an 1-bit immediate");
34241 return const0_rtx;
34242
34243 case CODE_FOR_avx512f_cmpv8di3_mask:
34244 case CODE_FOR_avx512f_cmpv16si3_mask:
34245 case CODE_FOR_avx512f_ucmpv8di3_mask:
34246 case CODE_FOR_avx512f_ucmpv16si3_mask:
34247 case CODE_FOR_avx512vl_cmpv4di3_mask:
34248 case CODE_FOR_avx512vl_cmpv8si3_mask:
34249 case CODE_FOR_avx512vl_ucmpv4di3_mask:
34250 case CODE_FOR_avx512vl_ucmpv8si3_mask:
34251 case CODE_FOR_avx512vl_cmpv2di3_mask:
34252 case CODE_FOR_avx512vl_cmpv4si3_mask:
34253 case CODE_FOR_avx512vl_ucmpv2di3_mask:
34254 case CODE_FOR_avx512vl_ucmpv4si3_mask:
34255 error ("the last argument must be a 3-bit immediate");
34256 return const0_rtx;
34257
34258 case CODE_FOR_sse4_1_roundsd:
34259 case CODE_FOR_sse4_1_roundss:
34260
34261 case CODE_FOR_sse4_1_roundpd:
34262 case CODE_FOR_sse4_1_roundps:
34263 case CODE_FOR_avx_roundpd256:
34264 case CODE_FOR_avx_roundps256:
34265
34266 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34267 case CODE_FOR_sse4_1_roundps_sfix:
34268 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34269 case CODE_FOR_avx_roundps_sfix256:
34270
34271 case CODE_FOR_sse4_1_blendps:
34272 case CODE_FOR_avx_blendpd256:
34273 case CODE_FOR_avx_vpermilv4df:
34274 case CODE_FOR_avx512f_getmantv8df_mask:
34275 case CODE_FOR_avx512f_getmantv16sf_mask:
34276 case CODE_FOR_avx512vl_getmantv8sf_mask:
34277 case CODE_FOR_avx512vl_getmantv4df_mask:
34278 case CODE_FOR_avx512vl_getmantv4sf_mask:
34279 case CODE_FOR_avx512vl_getmantv2df_mask:
34280 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34281 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34282 case CODE_FOR_avx512dq_rangepv4df_mask:
34283 case CODE_FOR_avx512dq_rangepv8sf_mask:
34284 case CODE_FOR_avx512dq_rangepv2df_mask:
34285 case CODE_FOR_avx512dq_rangepv4sf_mask:
34286 error ("the last argument must be a 4-bit immediate");
34287 return const0_rtx;
34288
34289 case CODE_FOR_sha1rnds4:
34290 case CODE_FOR_sse4_1_blendpd:
34291 case CODE_FOR_avx_vpermilv2df:
34292 case CODE_FOR_xop_vpermil2v2df3:
34293 case CODE_FOR_xop_vpermil2v4sf3:
34294 case CODE_FOR_xop_vpermil2v4df3:
34295 case CODE_FOR_xop_vpermil2v8sf3:
34296 case CODE_FOR_avx512f_vinsertf32x4_mask:
34297 case CODE_FOR_avx512f_vinserti32x4_mask:
34298 case CODE_FOR_avx512f_vextractf32x4_mask:
34299 case CODE_FOR_avx512f_vextracti32x4_mask:
34300 case CODE_FOR_sse2_shufpd:
34301 case CODE_FOR_sse2_shufpd_mask:
34302 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34303 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34304 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34305 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34306 error ("the last argument must be a 2-bit immediate");
34307 return const0_rtx;
34308
34309 case CODE_FOR_avx_vextractf128v4df:
34310 case CODE_FOR_avx_vextractf128v8sf:
34311 case CODE_FOR_avx_vextractf128v8si:
34312 case CODE_FOR_avx_vinsertf128v4df:
34313 case CODE_FOR_avx_vinsertf128v8sf:
34314 case CODE_FOR_avx_vinsertf128v8si:
34315 case CODE_FOR_avx512f_vinsertf64x4_mask:
34316 case CODE_FOR_avx512f_vinserti64x4_mask:
34317 case CODE_FOR_avx512f_vextractf64x4_mask:
34318 case CODE_FOR_avx512f_vextracti64x4_mask:
34319 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34320 case CODE_FOR_avx512dq_vinserti32x8_mask:
34321 case CODE_FOR_avx512vl_vinsertv4df:
34322 case CODE_FOR_avx512vl_vinsertv4di:
34323 case CODE_FOR_avx512vl_vinsertv8sf:
34324 case CODE_FOR_avx512vl_vinsertv8si:
34325 error ("the last argument must be a 1-bit immediate");
34326 return const0_rtx;
34327
34328 case CODE_FOR_avx_vmcmpv2df3:
34329 case CODE_FOR_avx_vmcmpv4sf3:
34330 case CODE_FOR_avx_cmpv2df3:
34331 case CODE_FOR_avx_cmpv4sf3:
34332 case CODE_FOR_avx_cmpv4df3:
34333 case CODE_FOR_avx_cmpv8sf3:
34334 case CODE_FOR_avx512f_cmpv8df3_mask:
34335 case CODE_FOR_avx512f_cmpv16sf3_mask:
34336 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34337 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34338 error ("the last argument must be a 5-bit immediate");
34339 return const0_rtx;
34340
34341 default:
34342 switch (nargs_constant)
34343 {
34344 case 2:
34345 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34346 (!mask_pos && (nargs - i) == nargs_constant))
34347 {
34348 error ("the next to last argument must be an 8-bit immediate");
34349 break;
34350 }
34351 case 1:
34352 error ("the last argument must be an 8-bit immediate");
34353 break;
34354 default:
34355 gcc_unreachable ();
34356 }
34357 return const0_rtx;
34358 }
34359 }
34360 else
34361 {
34362 if (VECTOR_MODE_P (mode))
34363 op = safe_vector_operand (op, mode);
34364
34365 /* If we aren't optimizing, only allow one memory operand to
34366 be generated. */
34367 if (memory_operand (op, mode))
34368 num_memory++;
34369
34370 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34371 {
34372 if (optimize || !match || num_memory > 1)
34373 op = copy_to_mode_reg (mode, op);
34374 }
34375 else
34376 {
34377 op = copy_to_reg (op);
34378 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34379 }
34380 }
34381
34382 args[i].op = op;
34383 args[i].mode = mode;
34384 }
34385
34386 switch (nargs)
34387 {
34388 case 1:
34389 pat = GEN_FCN (icode) (real_target, args[0].op);
34390 break;
34391 case 2:
34392 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34393 break;
34394 case 3:
34395 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34396 args[2].op);
34397 break;
34398 case 4:
34399 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34400 args[2].op, args[3].op);
34401 break;
34402 case 5:
34403 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34404 args[2].op, args[3].op, args[4].op);
34405 case 6:
34406 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34407 args[2].op, args[3].op, args[4].op,
34408 args[5].op);
34409 break;
34410 default:
34411 gcc_unreachable ();
34412 }
34413
34414 if (! pat)
34415 return 0;
34416
34417 emit_insn (pat);
34418 return target;
34419 }
34420
34421 /* Transform pattern of following layout:
34422 (parallel [
34423 set (A B)
34424 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34425 ])
34426 into:
34427 (set (A B))
34428
34429 Or:
34430 (parallel [ A B
34431 ...
34432 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34433 ...
34434 ])
34435 into:
34436 (parallel [ A B ... ]) */
34437
34438 static rtx
34439 ix86_erase_embedded_rounding (rtx pat)
34440 {
34441 if (GET_CODE (pat) == INSN)
34442 pat = PATTERN (pat);
34443
34444 gcc_assert (GET_CODE (pat) == PARALLEL);
34445
34446 if (XVECLEN (pat, 0) == 2)
34447 {
34448 rtx p0 = XVECEXP (pat, 0, 0);
34449 rtx p1 = XVECEXP (pat, 0, 1);
34450
34451 gcc_assert (GET_CODE (p0) == SET
34452 && GET_CODE (p1) == UNSPEC
34453 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34454
34455 return p0;
34456 }
34457 else
34458 {
34459 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34460 int i = 0;
34461 int j = 0;
34462
34463 for (; i < XVECLEN (pat, 0); ++i)
34464 {
34465 rtx elem = XVECEXP (pat, 0, i);
34466 if (GET_CODE (elem) != UNSPEC
34467 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34468 res [j++] = elem;
34469 }
34470
34471 /* No more than 1 occurence was removed. */
34472 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34473
34474 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34475 }
34476 }
34477
34478 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34479 with rounding. */
34480 static rtx
34481 ix86_expand_sse_comi_round (const struct builtin_description *d,
34482 tree exp, rtx target)
34483 {
34484 rtx pat, set_dst;
34485 tree arg0 = CALL_EXPR_ARG (exp, 0);
34486 tree arg1 = CALL_EXPR_ARG (exp, 1);
34487 tree arg2 = CALL_EXPR_ARG (exp, 2);
34488 tree arg3 = CALL_EXPR_ARG (exp, 3);
34489 rtx op0 = expand_normal (arg0);
34490 rtx op1 = expand_normal (arg1);
34491 rtx op2 = expand_normal (arg2);
34492 rtx op3 = expand_normal (arg3);
34493 enum insn_code icode = d->icode;
34494 const struct insn_data_d *insn_p = &insn_data[icode];
34495 enum machine_mode mode0 = insn_p->operand[0].mode;
34496 enum machine_mode mode1 = insn_p->operand[1].mode;
34497 enum rtx_code comparison = UNEQ;
34498 bool need_ucomi = false;
34499
34500 /* See avxintrin.h for values. */
34501 enum rtx_code comi_comparisons[32] =
34502 {
34503 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34504 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34505 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34506 };
34507 bool need_ucomi_values[32] =
34508 {
34509 true, false, false, true, true, false, false, true,
34510 true, false, false, true, true, false, false, true,
34511 false, true, true, false, false, true, true, false,
34512 false, true, true, false, false, true, true, false
34513 };
34514
34515 if (!CONST_INT_P (op2))
34516 {
34517 error ("the third argument must be comparison constant");
34518 return const0_rtx;
34519 }
34520 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34521 {
34522 error ("incorect comparison mode");
34523 return const0_rtx;
34524 }
34525
34526 if (!insn_p->operand[2].predicate (op3, SImode))
34527 {
34528 error ("incorrect rounding operand");
34529 return const0_rtx;
34530 }
34531
34532 comparison = comi_comparisons[INTVAL (op2)];
34533 need_ucomi = need_ucomi_values[INTVAL (op2)];
34534
34535 if (VECTOR_MODE_P (mode0))
34536 op0 = safe_vector_operand (op0, mode0);
34537 if (VECTOR_MODE_P (mode1))
34538 op1 = safe_vector_operand (op1, mode1);
34539
34540 target = gen_reg_rtx (SImode);
34541 emit_move_insn (target, const0_rtx);
34542 target = gen_rtx_SUBREG (QImode, target, 0);
34543
34544 if ((optimize && !register_operand (op0, mode0))
34545 || !insn_p->operand[0].predicate (op0, mode0))
34546 op0 = copy_to_mode_reg (mode0, op0);
34547 if ((optimize && !register_operand (op1, mode1))
34548 || !insn_p->operand[1].predicate (op1, mode1))
34549 op1 = copy_to_mode_reg (mode1, op1);
34550
34551 if (need_ucomi)
34552 icode = icode == CODE_FOR_sse_comi_round
34553 ? CODE_FOR_sse_ucomi_round
34554 : CODE_FOR_sse2_ucomi_round;
34555
34556 pat = GEN_FCN (icode) (op0, op1, op3);
34557 if (! pat)
34558 return 0;
34559
34560 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34561 if (INTVAL (op3) == NO_ROUND)
34562 {
34563 pat = ix86_erase_embedded_rounding (pat);
34564 if (! pat)
34565 return 0;
34566
34567 set_dst = SET_DEST (pat);
34568 }
34569 else
34570 {
34571 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34572 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34573 }
34574
34575 emit_insn (pat);
34576 emit_insn (gen_rtx_SET (VOIDmode,
34577 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34578 gen_rtx_fmt_ee (comparison, QImode,
34579 set_dst,
34580 const0_rtx)));
34581
34582 return SUBREG_REG (target);
34583 }
34584
34585 static rtx
34586 ix86_expand_round_builtin (const struct builtin_description *d,
34587 tree exp, rtx target)
34588 {
34589 rtx pat;
34590 unsigned int i, nargs;
34591 struct
34592 {
34593 rtx op;
34594 enum machine_mode mode;
34595 } args[6];
34596 enum insn_code icode = d->icode;
34597 const struct insn_data_d *insn_p = &insn_data[icode];
34598 enum machine_mode tmode = insn_p->operand[0].mode;
34599 unsigned int nargs_constant = 0;
34600 unsigned int redundant_embed_rnd = 0;
34601
34602 switch ((enum ix86_builtin_func_type) d->flag)
34603 {
34604 case UINT64_FTYPE_V2DF_INT:
34605 case UINT64_FTYPE_V4SF_INT:
34606 case UINT_FTYPE_V2DF_INT:
34607 case UINT_FTYPE_V4SF_INT:
34608 case INT64_FTYPE_V2DF_INT:
34609 case INT64_FTYPE_V4SF_INT:
34610 case INT_FTYPE_V2DF_INT:
34611 case INT_FTYPE_V4SF_INT:
34612 nargs = 2;
34613 break;
34614 case V4SF_FTYPE_V4SF_UINT_INT:
34615 case V4SF_FTYPE_V4SF_UINT64_INT:
34616 case V2DF_FTYPE_V2DF_UINT64_INT:
34617 case V4SF_FTYPE_V4SF_INT_INT:
34618 case V4SF_FTYPE_V4SF_INT64_INT:
34619 case V2DF_FTYPE_V2DF_INT64_INT:
34620 case V4SF_FTYPE_V4SF_V4SF_INT:
34621 case V2DF_FTYPE_V2DF_V2DF_INT:
34622 case V4SF_FTYPE_V4SF_V2DF_INT:
34623 case V2DF_FTYPE_V2DF_V4SF_INT:
34624 nargs = 3;
34625 break;
34626 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34627 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34628 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34629 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34630 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34631 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34632 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34633 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34634 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34635 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34636 nargs = 4;
34637 break;
34638 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34639 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34640 nargs_constant = 2;
34641 nargs = 4;
34642 break;
34643 case INT_FTYPE_V4SF_V4SF_INT_INT:
34644 case INT_FTYPE_V2DF_V2DF_INT_INT:
34645 return ix86_expand_sse_comi_round (d, exp, target);
34646 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34647 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34648 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34649 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34650 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34651 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34652 nargs = 5;
34653 break;
34654 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34655 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34656 nargs_constant = 4;
34657 nargs = 5;
34658 break;
34659 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34660 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34661 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34662 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34663 nargs_constant = 3;
34664 nargs = 5;
34665 break;
34666 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34667 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34668 nargs = 6;
34669 nargs_constant = 4;
34670 break;
34671 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34672 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34673 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34674 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34675 nargs = 6;
34676 nargs_constant = 3;
34677 break;
34678 default:
34679 gcc_unreachable ();
34680 }
34681 gcc_assert (nargs <= ARRAY_SIZE (args));
34682
34683 if (optimize
34684 || target == 0
34685 || GET_MODE (target) != tmode
34686 || !insn_p->operand[0].predicate (target, tmode))
34687 target = gen_reg_rtx (tmode);
34688
34689 for (i = 0; i < nargs; i++)
34690 {
34691 tree arg = CALL_EXPR_ARG (exp, i);
34692 rtx op = expand_normal (arg);
34693 enum machine_mode mode = insn_p->operand[i + 1].mode;
34694 bool match = insn_p->operand[i + 1].predicate (op, mode);
34695
34696 if (i == nargs - nargs_constant)
34697 {
34698 if (!match)
34699 {
34700 switch (icode)
34701 {
34702 case CODE_FOR_avx512f_getmantv8df_mask_round:
34703 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34704 case CODE_FOR_avx512f_vgetmantv2df_round:
34705 case CODE_FOR_avx512f_vgetmantv4sf_round:
34706 error ("the immediate argument must be a 4-bit immediate");
34707 return const0_rtx;
34708 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34709 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34710 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34711 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34712 error ("the immediate argument must be a 5-bit immediate");
34713 return const0_rtx;
34714 default:
34715 error ("the immediate argument must be an 8-bit immediate");
34716 return const0_rtx;
34717 }
34718 }
34719 }
34720 else if (i == nargs-1)
34721 {
34722 if (!insn_p->operand[nargs].predicate (op, SImode))
34723 {
34724 error ("incorrect rounding operand");
34725 return const0_rtx;
34726 }
34727
34728 /* If there is no rounding use normal version of the pattern. */
34729 if (INTVAL (op) == NO_ROUND)
34730 redundant_embed_rnd = 1;
34731 }
34732 else
34733 {
34734 if (VECTOR_MODE_P (mode))
34735 op = safe_vector_operand (op, mode);
34736
34737 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34738 {
34739 if (optimize || !match)
34740 op = copy_to_mode_reg (mode, op);
34741 }
34742 else
34743 {
34744 op = copy_to_reg (op);
34745 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34746 }
34747 }
34748
34749 args[i].op = op;
34750 args[i].mode = mode;
34751 }
34752
34753 switch (nargs)
34754 {
34755 case 1:
34756 pat = GEN_FCN (icode) (target, args[0].op);
34757 break;
34758 case 2:
34759 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34760 break;
34761 case 3:
34762 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34763 args[2].op);
34764 break;
34765 case 4:
34766 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34767 args[2].op, args[3].op);
34768 break;
34769 case 5:
34770 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34771 args[2].op, args[3].op, args[4].op);
34772 case 6:
34773 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34774 args[2].op, args[3].op, args[4].op,
34775 args[5].op);
34776 break;
34777 default:
34778 gcc_unreachable ();
34779 }
34780
34781 if (!pat)
34782 return 0;
34783
34784 if (redundant_embed_rnd)
34785 pat = ix86_erase_embedded_rounding (pat);
34786
34787 emit_insn (pat);
34788 return target;
34789 }
34790
34791 /* Subroutine of ix86_expand_builtin to take care of special insns
34792 with variable number of operands. */
34793
34794 static rtx
34795 ix86_expand_special_args_builtin (const struct builtin_description *d,
34796 tree exp, rtx target)
34797 {
34798 tree arg;
34799 rtx pat, op;
34800 unsigned int i, nargs, arg_adjust, memory;
34801 bool aligned_mem = false;
34802 struct
34803 {
34804 rtx op;
34805 enum machine_mode mode;
34806 } args[3];
34807 enum insn_code icode = d->icode;
34808 bool last_arg_constant = false;
34809 const struct insn_data_d *insn_p = &insn_data[icode];
34810 enum machine_mode tmode = insn_p->operand[0].mode;
34811 enum { load, store } klass;
34812
34813 switch ((enum ix86_builtin_func_type) d->flag)
34814 {
34815 case VOID_FTYPE_VOID:
34816 emit_insn (GEN_FCN (icode) (target));
34817 return 0;
34818 case VOID_FTYPE_UINT64:
34819 case VOID_FTYPE_UNSIGNED:
34820 nargs = 0;
34821 klass = store;
34822 memory = 0;
34823 break;
34824
34825 case INT_FTYPE_VOID:
34826 case USHORT_FTYPE_VOID:
34827 case UINT64_FTYPE_VOID:
34828 case UNSIGNED_FTYPE_VOID:
34829 nargs = 0;
34830 klass = load;
34831 memory = 0;
34832 break;
34833 case UINT64_FTYPE_PUNSIGNED:
34834 case V2DI_FTYPE_PV2DI:
34835 case V4DI_FTYPE_PV4DI:
34836 case V32QI_FTYPE_PCCHAR:
34837 case V16QI_FTYPE_PCCHAR:
34838 case V8SF_FTYPE_PCV4SF:
34839 case V8SF_FTYPE_PCFLOAT:
34840 case V4SF_FTYPE_PCFLOAT:
34841 case V4DF_FTYPE_PCV2DF:
34842 case V4DF_FTYPE_PCDOUBLE:
34843 case V2DF_FTYPE_PCDOUBLE:
34844 case VOID_FTYPE_PVOID:
34845 case V16SI_FTYPE_PV4SI:
34846 case V16SF_FTYPE_PV4SF:
34847 case V8DI_FTYPE_PV4DI:
34848 case V8DI_FTYPE_PV8DI:
34849 case V8DF_FTYPE_PV4DF:
34850 nargs = 1;
34851 klass = load;
34852 memory = 0;
34853 switch (icode)
34854 {
34855 case CODE_FOR_sse4_1_movntdqa:
34856 case CODE_FOR_avx2_movntdqa:
34857 case CODE_FOR_avx512f_movntdqa:
34858 aligned_mem = true;
34859 break;
34860 default:
34861 break;
34862 }
34863 break;
34864 case VOID_FTYPE_PV2SF_V4SF:
34865 case VOID_FTYPE_PV8DI_V8DI:
34866 case VOID_FTYPE_PV4DI_V4DI:
34867 case VOID_FTYPE_PV2DI_V2DI:
34868 case VOID_FTYPE_PCHAR_V32QI:
34869 case VOID_FTYPE_PCHAR_V16QI:
34870 case VOID_FTYPE_PFLOAT_V16SF:
34871 case VOID_FTYPE_PFLOAT_V8SF:
34872 case VOID_FTYPE_PFLOAT_V4SF:
34873 case VOID_FTYPE_PDOUBLE_V8DF:
34874 case VOID_FTYPE_PDOUBLE_V4DF:
34875 case VOID_FTYPE_PDOUBLE_V2DF:
34876 case VOID_FTYPE_PLONGLONG_LONGLONG:
34877 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34878 case VOID_FTYPE_PINT_INT:
34879 nargs = 1;
34880 klass = store;
34881 /* Reserve memory operand for target. */
34882 memory = ARRAY_SIZE (args);
34883 switch (icode)
34884 {
34885 /* These builtins and instructions require the memory
34886 to be properly aligned. */
34887 case CODE_FOR_avx_movntv4di:
34888 case CODE_FOR_sse2_movntv2di:
34889 case CODE_FOR_avx_movntv8sf:
34890 case CODE_FOR_sse_movntv4sf:
34891 case CODE_FOR_sse4a_vmmovntv4sf:
34892 case CODE_FOR_avx_movntv4df:
34893 case CODE_FOR_sse2_movntv2df:
34894 case CODE_FOR_sse4a_vmmovntv2df:
34895 case CODE_FOR_sse2_movntidi:
34896 case CODE_FOR_sse_movntq:
34897 case CODE_FOR_sse2_movntisi:
34898 case CODE_FOR_avx512f_movntv16sf:
34899 case CODE_FOR_avx512f_movntv8df:
34900 case CODE_FOR_avx512f_movntv8di:
34901 aligned_mem = true;
34902 break;
34903 default:
34904 break;
34905 }
34906 break;
34907 case V4SF_FTYPE_V4SF_PCV2SF:
34908 case V2DF_FTYPE_V2DF_PCDOUBLE:
34909 nargs = 2;
34910 klass = load;
34911 memory = 1;
34912 break;
34913 case V8SF_FTYPE_PCV8SF_V8SI:
34914 case V4DF_FTYPE_PCV4DF_V4DI:
34915 case V4SF_FTYPE_PCV4SF_V4SI:
34916 case V2DF_FTYPE_PCV2DF_V2DI:
34917 case V8SI_FTYPE_PCV8SI_V8SI:
34918 case V4DI_FTYPE_PCV4DI_V4DI:
34919 case V4SI_FTYPE_PCV4SI_V4SI:
34920 case V2DI_FTYPE_PCV2DI_V2DI:
34921 nargs = 2;
34922 klass = load;
34923 memory = 0;
34924 break;
34925 case VOID_FTYPE_PV8DF_V8DF_QI:
34926 case VOID_FTYPE_PV16SF_V16SF_HI:
34927 case VOID_FTYPE_PV8DI_V8DI_QI:
34928 case VOID_FTYPE_PV16SI_V16SI_HI:
34929 switch (icode)
34930 {
34931 /* These builtins and instructions require the memory
34932 to be properly aligned. */
34933 case CODE_FOR_avx512f_storev16sf_mask:
34934 case CODE_FOR_avx512f_storev16si_mask:
34935 case CODE_FOR_avx512f_storev8df_mask:
34936 case CODE_FOR_avx512f_storev8di_mask:
34937 case CODE_FOR_avx512vl_storev8sf_mask:
34938 case CODE_FOR_avx512vl_storev8si_mask:
34939 case CODE_FOR_avx512vl_storev4df_mask:
34940 case CODE_FOR_avx512vl_storev4di_mask:
34941 case CODE_FOR_avx512vl_storev4sf_mask:
34942 case CODE_FOR_avx512vl_storev4si_mask:
34943 case CODE_FOR_avx512vl_storev2df_mask:
34944 case CODE_FOR_avx512vl_storev2di_mask:
34945 aligned_mem = true;
34946 break;
34947 default:
34948 break;
34949 }
34950 /* FALLTHRU */
34951 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34952 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34953 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34954 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34955 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34956 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34957 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34958 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34959 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34960 case VOID_FTYPE_PFLOAT_V4SF_QI:
34961 case VOID_FTYPE_PV8SI_V8DI_QI:
34962 case VOID_FTYPE_PV8HI_V8DI_QI:
34963 case VOID_FTYPE_PV16HI_V16SI_HI:
34964 case VOID_FTYPE_PV16QI_V8DI_QI:
34965 case VOID_FTYPE_PV16QI_V16SI_HI:
34966 nargs = 2;
34967 klass = store;
34968 /* Reserve memory operand for target. */
34969 memory = ARRAY_SIZE (args);
34970 break;
34971 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34972 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34973 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34974 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34975 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34976 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34977 nargs = 3;
34978 klass = load;
34979 memory = 0;
34980 switch (icode)
34981 {
34982 /* These builtins and instructions require the memory
34983 to be properly aligned. */
34984 case CODE_FOR_avx512f_loadv16sf_mask:
34985 case CODE_FOR_avx512f_loadv16si_mask:
34986 case CODE_FOR_avx512f_loadv8df_mask:
34987 case CODE_FOR_avx512f_loadv8di_mask:
34988 case CODE_FOR_avx512vl_loadv8sf_mask:
34989 case CODE_FOR_avx512vl_loadv8si_mask:
34990 case CODE_FOR_avx512vl_loadv4df_mask:
34991 case CODE_FOR_avx512vl_loadv4di_mask:
34992 case CODE_FOR_avx512vl_loadv4sf_mask:
34993 case CODE_FOR_avx512vl_loadv4si_mask:
34994 case CODE_FOR_avx512vl_loadv2df_mask:
34995 case CODE_FOR_avx512vl_loadv2di_mask:
34996 case CODE_FOR_avx512bw_loadv64qi_mask:
34997 case CODE_FOR_avx512vl_loadv32qi_mask:
34998 case CODE_FOR_avx512vl_loadv16qi_mask:
34999 case CODE_FOR_avx512bw_loadv32hi_mask:
35000 case CODE_FOR_avx512vl_loadv16hi_mask:
35001 case CODE_FOR_avx512vl_loadv8hi_mask:
35002 aligned_mem = true;
35003 break;
35004 default:
35005 break;
35006 }
35007 break;
35008 case VOID_FTYPE_UINT_UINT_UINT:
35009 case VOID_FTYPE_UINT64_UINT_UINT:
35010 case UCHAR_FTYPE_UINT_UINT_UINT:
35011 case UCHAR_FTYPE_UINT64_UINT_UINT:
35012 nargs = 3;
35013 klass = load;
35014 memory = ARRAY_SIZE (args);
35015 last_arg_constant = true;
35016 break;
35017 default:
35018 gcc_unreachable ();
35019 }
35020
35021 gcc_assert (nargs <= ARRAY_SIZE (args));
35022
35023 if (klass == store)
35024 {
35025 arg = CALL_EXPR_ARG (exp, 0);
35026 op = expand_normal (arg);
35027 gcc_assert (target == 0);
35028 if (memory)
35029 {
35030 op = ix86_zero_extend_to_Pmode (op);
35031 target = gen_rtx_MEM (tmode, op);
35032 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35033 on it. Try to improve it using get_pointer_alignment,
35034 and if the special builtin is one that requires strict
35035 mode alignment, also from it's GET_MODE_ALIGNMENT.
35036 Failure to do so could lead to ix86_legitimate_combined_insn
35037 rejecting all changes to such insns. */
35038 unsigned int align = get_pointer_alignment (arg);
35039 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
35040 align = GET_MODE_ALIGNMENT (tmode);
35041 if (MEM_ALIGN (target) < align)
35042 set_mem_align (target, align);
35043 }
35044 else
35045 target = force_reg (tmode, op);
35046 arg_adjust = 1;
35047 }
35048 else
35049 {
35050 arg_adjust = 0;
35051 if (optimize
35052 || target == 0
35053 || !register_operand (target, tmode)
35054 || GET_MODE (target) != tmode)
35055 target = gen_reg_rtx (tmode);
35056 }
35057
35058 for (i = 0; i < nargs; i++)
35059 {
35060 enum machine_mode mode = insn_p->operand[i + 1].mode;
35061 bool match;
35062
35063 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
35064 op = expand_normal (arg);
35065 match = insn_p->operand[i + 1].predicate (op, mode);
35066
35067 if (last_arg_constant && (i + 1) == nargs)
35068 {
35069 if (!match)
35070 {
35071 if (icode == CODE_FOR_lwp_lwpvalsi3
35072 || icode == CODE_FOR_lwp_lwpinssi3
35073 || icode == CODE_FOR_lwp_lwpvaldi3
35074 || icode == CODE_FOR_lwp_lwpinsdi3)
35075 error ("the last argument must be a 32-bit immediate");
35076 else
35077 error ("the last argument must be an 8-bit immediate");
35078 return const0_rtx;
35079 }
35080 }
35081 else
35082 {
35083 if (i == memory)
35084 {
35085 /* This must be the memory operand. */
35086 op = ix86_zero_extend_to_Pmode (op);
35087 op = gen_rtx_MEM (mode, op);
35088 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35089 on it. Try to improve it using get_pointer_alignment,
35090 and if the special builtin is one that requires strict
35091 mode alignment, also from it's GET_MODE_ALIGNMENT.
35092 Failure to do so could lead to ix86_legitimate_combined_insn
35093 rejecting all changes to such insns. */
35094 unsigned int align = get_pointer_alignment (arg);
35095 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
35096 align = GET_MODE_ALIGNMENT (mode);
35097 if (MEM_ALIGN (op) < align)
35098 set_mem_align (op, align);
35099 }
35100 else
35101 {
35102 /* This must be register. */
35103 if (VECTOR_MODE_P (mode))
35104 op = safe_vector_operand (op, mode);
35105
35106 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35107 op = copy_to_mode_reg (mode, op);
35108 else
35109 {
35110 op = copy_to_reg (op);
35111 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
35112 }
35113 }
35114 }
35115
35116 args[i].op = op;
35117 args[i].mode = mode;
35118 }
35119
35120 switch (nargs)
35121 {
35122 case 0:
35123 pat = GEN_FCN (icode) (target);
35124 break;
35125 case 1:
35126 pat = GEN_FCN (icode) (target, args[0].op);
35127 break;
35128 case 2:
35129 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35130 break;
35131 case 3:
35132 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35133 break;
35134 default:
35135 gcc_unreachable ();
35136 }
35137
35138 if (! pat)
35139 return 0;
35140 emit_insn (pat);
35141 return klass == store ? 0 : target;
35142 }
35143
35144 /* Return the integer constant in ARG. Constrain it to be in the range
35145 of the subparts of VEC_TYPE; issue an error if not. */
35146
35147 static int
35148 get_element_number (tree vec_type, tree arg)
35149 {
35150 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35151
35152 if (!tree_fits_uhwi_p (arg)
35153 || (elt = tree_to_uhwi (arg), elt > max))
35154 {
35155 error ("selector must be an integer constant in the range 0..%wi", max);
35156 return 0;
35157 }
35158
35159 return elt;
35160 }
35161
35162 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35163 ix86_expand_vector_init. We DO have language-level syntax for this, in
35164 the form of (type){ init-list }. Except that since we can't place emms
35165 instructions from inside the compiler, we can't allow the use of MMX
35166 registers unless the user explicitly asks for it. So we do *not* define
35167 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35168 we have builtins invoked by mmintrin.h that gives us license to emit
35169 these sorts of instructions. */
35170
35171 static rtx
35172 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35173 {
35174 enum machine_mode tmode = TYPE_MODE (type);
35175 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35176 int i, n_elt = GET_MODE_NUNITS (tmode);
35177 rtvec v = rtvec_alloc (n_elt);
35178
35179 gcc_assert (VECTOR_MODE_P (tmode));
35180 gcc_assert (call_expr_nargs (exp) == n_elt);
35181
35182 for (i = 0; i < n_elt; ++i)
35183 {
35184 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35185 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35186 }
35187
35188 if (!target || !register_operand (target, tmode))
35189 target = gen_reg_rtx (tmode);
35190
35191 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35192 return target;
35193 }
35194
35195 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35196 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35197 had a language-level syntax for referencing vector elements. */
35198
35199 static rtx
35200 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35201 {
35202 enum machine_mode tmode, mode0;
35203 tree arg0, arg1;
35204 int elt;
35205 rtx op0;
35206
35207 arg0 = CALL_EXPR_ARG (exp, 0);
35208 arg1 = CALL_EXPR_ARG (exp, 1);
35209
35210 op0 = expand_normal (arg0);
35211 elt = get_element_number (TREE_TYPE (arg0), arg1);
35212
35213 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35214 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35215 gcc_assert (VECTOR_MODE_P (mode0));
35216
35217 op0 = force_reg (mode0, op0);
35218
35219 if (optimize || !target || !register_operand (target, tmode))
35220 target = gen_reg_rtx (tmode);
35221
35222 ix86_expand_vector_extract (true, target, op0, elt);
35223
35224 return target;
35225 }
35226
35227 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35228 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35229 a language-level syntax for referencing vector elements. */
35230
35231 static rtx
35232 ix86_expand_vec_set_builtin (tree exp)
35233 {
35234 enum machine_mode tmode, mode1;
35235 tree arg0, arg1, arg2;
35236 int elt;
35237 rtx op0, op1, target;
35238
35239 arg0 = CALL_EXPR_ARG (exp, 0);
35240 arg1 = CALL_EXPR_ARG (exp, 1);
35241 arg2 = CALL_EXPR_ARG (exp, 2);
35242
35243 tmode = TYPE_MODE (TREE_TYPE (arg0));
35244 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35245 gcc_assert (VECTOR_MODE_P (tmode));
35246
35247 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35248 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35249 elt = get_element_number (TREE_TYPE (arg0), arg2);
35250
35251 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35252 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35253
35254 op0 = force_reg (tmode, op0);
35255 op1 = force_reg (mode1, op1);
35256
35257 /* OP0 is the source of these builtin functions and shouldn't be
35258 modified. Create a copy, use it and return it as target. */
35259 target = gen_reg_rtx (tmode);
35260 emit_move_insn (target, op0);
35261 ix86_expand_vector_set (true, target, op1, elt);
35262
35263 return target;
35264 }
35265
35266 /* Expand an expression EXP that calls a built-in function,
35267 with result going to TARGET if that's convenient
35268 (and in mode MODE if that's convenient).
35269 SUBTARGET may be used as the target for computing one of EXP's operands.
35270 IGNORE is nonzero if the value is to be ignored. */
35271
35272 static rtx
35273 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35274 enum machine_mode mode, int ignore)
35275 {
35276 const struct builtin_description *d;
35277 size_t i;
35278 enum insn_code icode;
35279 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35280 tree arg0, arg1, arg2, arg3, arg4;
35281 rtx op0, op1, op2, op3, op4, pat, insn;
35282 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35283 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35284
35285 /* For CPU builtins that can be folded, fold first and expand the fold. */
35286 switch (fcode)
35287 {
35288 case IX86_BUILTIN_CPU_INIT:
35289 {
35290 /* Make it call __cpu_indicator_init in libgcc. */
35291 tree call_expr, fndecl, type;
35292 type = build_function_type_list (integer_type_node, NULL_TREE);
35293 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35294 call_expr = build_call_expr (fndecl, 0);
35295 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35296 }
35297 case IX86_BUILTIN_CPU_IS:
35298 case IX86_BUILTIN_CPU_SUPPORTS:
35299 {
35300 tree arg0 = CALL_EXPR_ARG (exp, 0);
35301 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35302 gcc_assert (fold_expr != NULL_TREE);
35303 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35304 }
35305 }
35306
35307 /* Determine whether the builtin function is available under the current ISA.
35308 Originally the builtin was not created if it wasn't applicable to the
35309 current ISA based on the command line switches. With function specific
35310 options, we need to check in the context of the function making the call
35311 whether it is supported. */
35312 if (ix86_builtins_isa[fcode].isa
35313 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35314 {
35315 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35316 NULL, (enum fpmath_unit) 0, false);
35317
35318 if (!opts)
35319 error ("%qE needs unknown isa option", fndecl);
35320 else
35321 {
35322 gcc_assert (opts != NULL);
35323 error ("%qE needs isa option %s", fndecl, opts);
35324 free (opts);
35325 }
35326 return const0_rtx;
35327 }
35328
35329 switch (fcode)
35330 {
35331 case IX86_BUILTIN_MASKMOVQ:
35332 case IX86_BUILTIN_MASKMOVDQU:
35333 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35334 ? CODE_FOR_mmx_maskmovq
35335 : CODE_FOR_sse2_maskmovdqu);
35336 /* Note the arg order is different from the operand order. */
35337 arg1 = CALL_EXPR_ARG (exp, 0);
35338 arg2 = CALL_EXPR_ARG (exp, 1);
35339 arg0 = CALL_EXPR_ARG (exp, 2);
35340 op0 = expand_normal (arg0);
35341 op1 = expand_normal (arg1);
35342 op2 = expand_normal (arg2);
35343 mode0 = insn_data[icode].operand[0].mode;
35344 mode1 = insn_data[icode].operand[1].mode;
35345 mode2 = insn_data[icode].operand[2].mode;
35346
35347 op0 = ix86_zero_extend_to_Pmode (op0);
35348 op0 = gen_rtx_MEM (mode1, op0);
35349
35350 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35351 op0 = copy_to_mode_reg (mode0, op0);
35352 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35353 op1 = copy_to_mode_reg (mode1, op1);
35354 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35355 op2 = copy_to_mode_reg (mode2, op2);
35356 pat = GEN_FCN (icode) (op0, op1, op2);
35357 if (! pat)
35358 return 0;
35359 emit_insn (pat);
35360 return 0;
35361
35362 case IX86_BUILTIN_LDMXCSR:
35363 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35364 target = assign_386_stack_local (SImode, SLOT_TEMP);
35365 emit_move_insn (target, op0);
35366 emit_insn (gen_sse_ldmxcsr (target));
35367 return 0;
35368
35369 case IX86_BUILTIN_STMXCSR:
35370 target = assign_386_stack_local (SImode, SLOT_TEMP);
35371 emit_insn (gen_sse_stmxcsr (target));
35372 return copy_to_mode_reg (SImode, target);
35373
35374 case IX86_BUILTIN_CLFLUSH:
35375 arg0 = CALL_EXPR_ARG (exp, 0);
35376 op0 = expand_normal (arg0);
35377 icode = CODE_FOR_sse2_clflush;
35378 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35379 op0 = ix86_zero_extend_to_Pmode (op0);
35380
35381 emit_insn (gen_sse2_clflush (op0));
35382 return 0;
35383
35384 case IX86_BUILTIN_CLFLUSHOPT:
35385 arg0 = CALL_EXPR_ARG (exp, 0);
35386 op0 = expand_normal (arg0);
35387 icode = CODE_FOR_clflushopt;
35388 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35389 op0 = ix86_zero_extend_to_Pmode (op0);
35390
35391 emit_insn (gen_clflushopt (op0));
35392 return 0;
35393
35394 case IX86_BUILTIN_MONITOR:
35395 arg0 = CALL_EXPR_ARG (exp, 0);
35396 arg1 = CALL_EXPR_ARG (exp, 1);
35397 arg2 = CALL_EXPR_ARG (exp, 2);
35398 op0 = expand_normal (arg0);
35399 op1 = expand_normal (arg1);
35400 op2 = expand_normal (arg2);
35401 if (!REG_P (op0))
35402 op0 = ix86_zero_extend_to_Pmode (op0);
35403 if (!REG_P (op1))
35404 op1 = copy_to_mode_reg (SImode, op1);
35405 if (!REG_P (op2))
35406 op2 = copy_to_mode_reg (SImode, op2);
35407 emit_insn (ix86_gen_monitor (op0, op1, op2));
35408 return 0;
35409
35410 case IX86_BUILTIN_MWAIT:
35411 arg0 = CALL_EXPR_ARG (exp, 0);
35412 arg1 = CALL_EXPR_ARG (exp, 1);
35413 op0 = expand_normal (arg0);
35414 op1 = expand_normal (arg1);
35415 if (!REG_P (op0))
35416 op0 = copy_to_mode_reg (SImode, op0);
35417 if (!REG_P (op1))
35418 op1 = copy_to_mode_reg (SImode, op1);
35419 emit_insn (gen_sse3_mwait (op0, op1));
35420 return 0;
35421
35422 case IX86_BUILTIN_VEC_INIT_V2SI:
35423 case IX86_BUILTIN_VEC_INIT_V4HI:
35424 case IX86_BUILTIN_VEC_INIT_V8QI:
35425 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35426
35427 case IX86_BUILTIN_VEC_EXT_V2DF:
35428 case IX86_BUILTIN_VEC_EXT_V2DI:
35429 case IX86_BUILTIN_VEC_EXT_V4SF:
35430 case IX86_BUILTIN_VEC_EXT_V4SI:
35431 case IX86_BUILTIN_VEC_EXT_V8HI:
35432 case IX86_BUILTIN_VEC_EXT_V2SI:
35433 case IX86_BUILTIN_VEC_EXT_V4HI:
35434 case IX86_BUILTIN_VEC_EXT_V16QI:
35435 return ix86_expand_vec_ext_builtin (exp, target);
35436
35437 case IX86_BUILTIN_VEC_SET_V2DI:
35438 case IX86_BUILTIN_VEC_SET_V4SF:
35439 case IX86_BUILTIN_VEC_SET_V4SI:
35440 case IX86_BUILTIN_VEC_SET_V8HI:
35441 case IX86_BUILTIN_VEC_SET_V4HI:
35442 case IX86_BUILTIN_VEC_SET_V16QI:
35443 return ix86_expand_vec_set_builtin (exp);
35444
35445 case IX86_BUILTIN_INFQ:
35446 case IX86_BUILTIN_HUGE_VALQ:
35447 {
35448 REAL_VALUE_TYPE inf;
35449 rtx tmp;
35450
35451 real_inf (&inf);
35452 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35453
35454 tmp = validize_mem (force_const_mem (mode, tmp));
35455
35456 if (target == 0)
35457 target = gen_reg_rtx (mode);
35458
35459 emit_move_insn (target, tmp);
35460 return target;
35461 }
35462
35463 case IX86_BUILTIN_RDPMC:
35464 case IX86_BUILTIN_RDTSC:
35465 case IX86_BUILTIN_RDTSCP:
35466
35467 op0 = gen_reg_rtx (DImode);
35468 op1 = gen_reg_rtx (DImode);
35469
35470 if (fcode == IX86_BUILTIN_RDPMC)
35471 {
35472 arg0 = CALL_EXPR_ARG (exp, 0);
35473 op2 = expand_normal (arg0);
35474 if (!register_operand (op2, SImode))
35475 op2 = copy_to_mode_reg (SImode, op2);
35476
35477 insn = (TARGET_64BIT
35478 ? gen_rdpmc_rex64 (op0, op1, op2)
35479 : gen_rdpmc (op0, op2));
35480 emit_insn (insn);
35481 }
35482 else if (fcode == IX86_BUILTIN_RDTSC)
35483 {
35484 insn = (TARGET_64BIT
35485 ? gen_rdtsc_rex64 (op0, op1)
35486 : gen_rdtsc (op0));
35487 emit_insn (insn);
35488 }
35489 else
35490 {
35491 op2 = gen_reg_rtx (SImode);
35492
35493 insn = (TARGET_64BIT
35494 ? gen_rdtscp_rex64 (op0, op1, op2)
35495 : gen_rdtscp (op0, op2));
35496 emit_insn (insn);
35497
35498 arg0 = CALL_EXPR_ARG (exp, 0);
35499 op4 = expand_normal (arg0);
35500 if (!address_operand (op4, VOIDmode))
35501 {
35502 op4 = convert_memory_address (Pmode, op4);
35503 op4 = copy_addr_to_reg (op4);
35504 }
35505 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35506 }
35507
35508 if (target == 0)
35509 {
35510 /* mode is VOIDmode if __builtin_rd* has been called
35511 without lhs. */
35512 if (mode == VOIDmode)
35513 return target;
35514 target = gen_reg_rtx (mode);
35515 }
35516
35517 if (TARGET_64BIT)
35518 {
35519 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35520 op1, 1, OPTAB_DIRECT);
35521 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35522 op0, 1, OPTAB_DIRECT);
35523 }
35524
35525 emit_move_insn (target, op0);
35526 return target;
35527
35528 case IX86_BUILTIN_FXSAVE:
35529 case IX86_BUILTIN_FXRSTOR:
35530 case IX86_BUILTIN_FXSAVE64:
35531 case IX86_BUILTIN_FXRSTOR64:
35532 case IX86_BUILTIN_FNSTENV:
35533 case IX86_BUILTIN_FLDENV:
35534 mode0 = BLKmode;
35535 switch (fcode)
35536 {
35537 case IX86_BUILTIN_FXSAVE:
35538 icode = CODE_FOR_fxsave;
35539 break;
35540 case IX86_BUILTIN_FXRSTOR:
35541 icode = CODE_FOR_fxrstor;
35542 break;
35543 case IX86_BUILTIN_FXSAVE64:
35544 icode = CODE_FOR_fxsave64;
35545 break;
35546 case IX86_BUILTIN_FXRSTOR64:
35547 icode = CODE_FOR_fxrstor64;
35548 break;
35549 case IX86_BUILTIN_FNSTENV:
35550 icode = CODE_FOR_fnstenv;
35551 break;
35552 case IX86_BUILTIN_FLDENV:
35553 icode = CODE_FOR_fldenv;
35554 break;
35555 default:
35556 gcc_unreachable ();
35557 }
35558
35559 arg0 = CALL_EXPR_ARG (exp, 0);
35560 op0 = expand_normal (arg0);
35561
35562 if (!address_operand (op0, VOIDmode))
35563 {
35564 op0 = convert_memory_address (Pmode, op0);
35565 op0 = copy_addr_to_reg (op0);
35566 }
35567 op0 = gen_rtx_MEM (mode0, op0);
35568
35569 pat = GEN_FCN (icode) (op0);
35570 if (pat)
35571 emit_insn (pat);
35572 return 0;
35573
35574 case IX86_BUILTIN_XSAVE:
35575 case IX86_BUILTIN_XRSTOR:
35576 case IX86_BUILTIN_XSAVE64:
35577 case IX86_BUILTIN_XRSTOR64:
35578 case IX86_BUILTIN_XSAVEOPT:
35579 case IX86_BUILTIN_XSAVEOPT64:
35580 case IX86_BUILTIN_XSAVES:
35581 case IX86_BUILTIN_XRSTORS:
35582 case IX86_BUILTIN_XSAVES64:
35583 case IX86_BUILTIN_XRSTORS64:
35584 case IX86_BUILTIN_XSAVEC:
35585 case IX86_BUILTIN_XSAVEC64:
35586 arg0 = CALL_EXPR_ARG (exp, 0);
35587 arg1 = CALL_EXPR_ARG (exp, 1);
35588 op0 = expand_normal (arg0);
35589 op1 = expand_normal (arg1);
35590
35591 if (!address_operand (op0, VOIDmode))
35592 {
35593 op0 = convert_memory_address (Pmode, op0);
35594 op0 = copy_addr_to_reg (op0);
35595 }
35596 op0 = gen_rtx_MEM (BLKmode, op0);
35597
35598 op1 = force_reg (DImode, op1);
35599
35600 if (TARGET_64BIT)
35601 {
35602 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35603 NULL, 1, OPTAB_DIRECT);
35604 switch (fcode)
35605 {
35606 case IX86_BUILTIN_XSAVE:
35607 icode = CODE_FOR_xsave_rex64;
35608 break;
35609 case IX86_BUILTIN_XRSTOR:
35610 icode = CODE_FOR_xrstor_rex64;
35611 break;
35612 case IX86_BUILTIN_XSAVE64:
35613 icode = CODE_FOR_xsave64;
35614 break;
35615 case IX86_BUILTIN_XRSTOR64:
35616 icode = CODE_FOR_xrstor64;
35617 break;
35618 case IX86_BUILTIN_XSAVEOPT:
35619 icode = CODE_FOR_xsaveopt_rex64;
35620 break;
35621 case IX86_BUILTIN_XSAVEOPT64:
35622 icode = CODE_FOR_xsaveopt64;
35623 break;
35624 case IX86_BUILTIN_XSAVES:
35625 icode = CODE_FOR_xsaves_rex64;
35626 break;
35627 case IX86_BUILTIN_XRSTORS:
35628 icode = CODE_FOR_xrstors_rex64;
35629 break;
35630 case IX86_BUILTIN_XSAVES64:
35631 icode = CODE_FOR_xsaves64;
35632 break;
35633 case IX86_BUILTIN_XRSTORS64:
35634 icode = CODE_FOR_xrstors64;
35635 break;
35636 case IX86_BUILTIN_XSAVEC:
35637 icode = CODE_FOR_xsavec_rex64;
35638 break;
35639 case IX86_BUILTIN_XSAVEC64:
35640 icode = CODE_FOR_xsavec64;
35641 break;
35642 default:
35643 gcc_unreachable ();
35644 }
35645
35646 op2 = gen_lowpart (SImode, op2);
35647 op1 = gen_lowpart (SImode, op1);
35648 pat = GEN_FCN (icode) (op0, op1, op2);
35649 }
35650 else
35651 {
35652 switch (fcode)
35653 {
35654 case IX86_BUILTIN_XSAVE:
35655 icode = CODE_FOR_xsave;
35656 break;
35657 case IX86_BUILTIN_XRSTOR:
35658 icode = CODE_FOR_xrstor;
35659 break;
35660 case IX86_BUILTIN_XSAVEOPT:
35661 icode = CODE_FOR_xsaveopt;
35662 break;
35663 case IX86_BUILTIN_XSAVES:
35664 icode = CODE_FOR_xsaves;
35665 break;
35666 case IX86_BUILTIN_XRSTORS:
35667 icode = CODE_FOR_xrstors;
35668 break;
35669 case IX86_BUILTIN_XSAVEC:
35670 icode = CODE_FOR_xsavec;
35671 break;
35672 default:
35673 gcc_unreachable ();
35674 }
35675 pat = GEN_FCN (icode) (op0, op1);
35676 }
35677
35678 if (pat)
35679 emit_insn (pat);
35680 return 0;
35681
35682 case IX86_BUILTIN_LLWPCB:
35683 arg0 = CALL_EXPR_ARG (exp, 0);
35684 op0 = expand_normal (arg0);
35685 icode = CODE_FOR_lwp_llwpcb;
35686 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35687 op0 = ix86_zero_extend_to_Pmode (op0);
35688 emit_insn (gen_lwp_llwpcb (op0));
35689 return 0;
35690
35691 case IX86_BUILTIN_SLWPCB:
35692 icode = CODE_FOR_lwp_slwpcb;
35693 if (!target
35694 || !insn_data[icode].operand[0].predicate (target, Pmode))
35695 target = gen_reg_rtx (Pmode);
35696 emit_insn (gen_lwp_slwpcb (target));
35697 return target;
35698
35699 case IX86_BUILTIN_BEXTRI32:
35700 case IX86_BUILTIN_BEXTRI64:
35701 arg0 = CALL_EXPR_ARG (exp, 0);
35702 arg1 = CALL_EXPR_ARG (exp, 1);
35703 op0 = expand_normal (arg0);
35704 op1 = expand_normal (arg1);
35705 icode = (fcode == IX86_BUILTIN_BEXTRI32
35706 ? CODE_FOR_tbm_bextri_si
35707 : CODE_FOR_tbm_bextri_di);
35708 if (!CONST_INT_P (op1))
35709 {
35710 error ("last argument must be an immediate");
35711 return const0_rtx;
35712 }
35713 else
35714 {
35715 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35716 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35717 op1 = GEN_INT (length);
35718 op2 = GEN_INT (lsb_index);
35719 pat = GEN_FCN (icode) (target, op0, op1, op2);
35720 if (pat)
35721 emit_insn (pat);
35722 return target;
35723 }
35724
35725 case IX86_BUILTIN_RDRAND16_STEP:
35726 icode = CODE_FOR_rdrandhi_1;
35727 mode0 = HImode;
35728 goto rdrand_step;
35729
35730 case IX86_BUILTIN_RDRAND32_STEP:
35731 icode = CODE_FOR_rdrandsi_1;
35732 mode0 = SImode;
35733 goto rdrand_step;
35734
35735 case IX86_BUILTIN_RDRAND64_STEP:
35736 icode = CODE_FOR_rdranddi_1;
35737 mode0 = DImode;
35738
35739 rdrand_step:
35740 op0 = gen_reg_rtx (mode0);
35741 emit_insn (GEN_FCN (icode) (op0));
35742
35743 arg0 = CALL_EXPR_ARG (exp, 0);
35744 op1 = expand_normal (arg0);
35745 if (!address_operand (op1, VOIDmode))
35746 {
35747 op1 = convert_memory_address (Pmode, op1);
35748 op1 = copy_addr_to_reg (op1);
35749 }
35750 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35751
35752 op1 = gen_reg_rtx (SImode);
35753 emit_move_insn (op1, CONST1_RTX (SImode));
35754
35755 /* Emit SImode conditional move. */
35756 if (mode0 == HImode)
35757 {
35758 op2 = gen_reg_rtx (SImode);
35759 emit_insn (gen_zero_extendhisi2 (op2, op0));
35760 }
35761 else if (mode0 == SImode)
35762 op2 = op0;
35763 else
35764 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35765
35766 if (target == 0
35767 || !register_operand (target, SImode))
35768 target = gen_reg_rtx (SImode);
35769
35770 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35771 const0_rtx);
35772 emit_insn (gen_rtx_SET (VOIDmode, target,
35773 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35774 return target;
35775
35776 case IX86_BUILTIN_RDSEED16_STEP:
35777 icode = CODE_FOR_rdseedhi_1;
35778 mode0 = HImode;
35779 goto rdseed_step;
35780
35781 case IX86_BUILTIN_RDSEED32_STEP:
35782 icode = CODE_FOR_rdseedsi_1;
35783 mode0 = SImode;
35784 goto rdseed_step;
35785
35786 case IX86_BUILTIN_RDSEED64_STEP:
35787 icode = CODE_FOR_rdseeddi_1;
35788 mode0 = DImode;
35789
35790 rdseed_step:
35791 op0 = gen_reg_rtx (mode0);
35792 emit_insn (GEN_FCN (icode) (op0));
35793
35794 arg0 = CALL_EXPR_ARG (exp, 0);
35795 op1 = expand_normal (arg0);
35796 if (!address_operand (op1, VOIDmode))
35797 {
35798 op1 = convert_memory_address (Pmode, op1);
35799 op1 = copy_addr_to_reg (op1);
35800 }
35801 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35802
35803 op2 = gen_reg_rtx (QImode);
35804
35805 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35806 const0_rtx);
35807 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35808
35809 if (target == 0
35810 || !register_operand (target, SImode))
35811 target = gen_reg_rtx (SImode);
35812
35813 emit_insn (gen_zero_extendqisi2 (target, op2));
35814 return target;
35815
35816 case IX86_BUILTIN_SBB32:
35817 icode = CODE_FOR_subsi3_carry;
35818 mode0 = SImode;
35819 goto addcarryx;
35820
35821 case IX86_BUILTIN_SBB64:
35822 icode = CODE_FOR_subdi3_carry;
35823 mode0 = DImode;
35824 goto addcarryx;
35825
35826 case IX86_BUILTIN_ADDCARRYX32:
35827 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35828 mode0 = SImode;
35829 goto addcarryx;
35830
35831 case IX86_BUILTIN_ADDCARRYX64:
35832 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35833 mode0 = DImode;
35834
35835 addcarryx:
35836 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35837 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35838 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35839 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35840
35841 op0 = gen_reg_rtx (QImode);
35842
35843 /* Generate CF from input operand. */
35844 op1 = expand_normal (arg0);
35845 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35846 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35847
35848 /* Gen ADCX instruction to compute X+Y+CF. */
35849 op2 = expand_normal (arg1);
35850 op3 = expand_normal (arg2);
35851
35852 if (!REG_P (op2))
35853 op2 = copy_to_mode_reg (mode0, op2);
35854 if (!REG_P (op3))
35855 op3 = copy_to_mode_reg (mode0, op3);
35856
35857 op0 = gen_reg_rtx (mode0);
35858
35859 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35860 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35861 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35862
35863 /* Store the result. */
35864 op4 = expand_normal (arg3);
35865 if (!address_operand (op4, VOIDmode))
35866 {
35867 op4 = convert_memory_address (Pmode, op4);
35868 op4 = copy_addr_to_reg (op4);
35869 }
35870 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35871
35872 /* Return current CF value. */
35873 if (target == 0)
35874 target = gen_reg_rtx (QImode);
35875
35876 PUT_MODE (pat, QImode);
35877 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35878 return target;
35879
35880 case IX86_BUILTIN_READ_FLAGS:
35881 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35882
35883 if (optimize
35884 || target == NULL_RTX
35885 || !nonimmediate_operand (target, word_mode)
35886 || GET_MODE (target) != word_mode)
35887 target = gen_reg_rtx (word_mode);
35888
35889 emit_insn (gen_pop (target));
35890 return target;
35891
35892 case IX86_BUILTIN_WRITE_FLAGS:
35893
35894 arg0 = CALL_EXPR_ARG (exp, 0);
35895 op0 = expand_normal (arg0);
35896 if (!general_no_elim_operand (op0, word_mode))
35897 op0 = copy_to_mode_reg (word_mode, op0);
35898
35899 emit_insn (gen_push (op0));
35900 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35901 return 0;
35902
35903 case IX86_BUILTIN_KORTESTC16:
35904 icode = CODE_FOR_kortestchi;
35905 mode0 = HImode;
35906 mode1 = CCCmode;
35907 goto kortest;
35908
35909 case IX86_BUILTIN_KORTESTZ16:
35910 icode = CODE_FOR_kortestzhi;
35911 mode0 = HImode;
35912 mode1 = CCZmode;
35913
35914 kortest:
35915 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35916 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35917 op0 = expand_normal (arg0);
35918 op1 = expand_normal (arg1);
35919
35920 op0 = copy_to_reg (op0);
35921 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35922 op1 = copy_to_reg (op1);
35923 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35924
35925 target = gen_reg_rtx (QImode);
35926 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35927
35928 /* Emit kortest. */
35929 emit_insn (GEN_FCN (icode) (op0, op1));
35930 /* And use setcc to return result from flags. */
35931 ix86_expand_setcc (target, EQ,
35932 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35933 return target;
35934
35935 case IX86_BUILTIN_GATHERSIV2DF:
35936 icode = CODE_FOR_avx2_gathersiv2df;
35937 goto gather_gen;
35938 case IX86_BUILTIN_GATHERSIV4DF:
35939 icode = CODE_FOR_avx2_gathersiv4df;
35940 goto gather_gen;
35941 case IX86_BUILTIN_GATHERDIV2DF:
35942 icode = CODE_FOR_avx2_gatherdiv2df;
35943 goto gather_gen;
35944 case IX86_BUILTIN_GATHERDIV4DF:
35945 icode = CODE_FOR_avx2_gatherdiv4df;
35946 goto gather_gen;
35947 case IX86_BUILTIN_GATHERSIV4SF:
35948 icode = CODE_FOR_avx2_gathersiv4sf;
35949 goto gather_gen;
35950 case IX86_BUILTIN_GATHERSIV8SF:
35951 icode = CODE_FOR_avx2_gathersiv8sf;
35952 goto gather_gen;
35953 case IX86_BUILTIN_GATHERDIV4SF:
35954 icode = CODE_FOR_avx2_gatherdiv4sf;
35955 goto gather_gen;
35956 case IX86_BUILTIN_GATHERDIV8SF:
35957 icode = CODE_FOR_avx2_gatherdiv8sf;
35958 goto gather_gen;
35959 case IX86_BUILTIN_GATHERSIV2DI:
35960 icode = CODE_FOR_avx2_gathersiv2di;
35961 goto gather_gen;
35962 case IX86_BUILTIN_GATHERSIV4DI:
35963 icode = CODE_FOR_avx2_gathersiv4di;
35964 goto gather_gen;
35965 case IX86_BUILTIN_GATHERDIV2DI:
35966 icode = CODE_FOR_avx2_gatherdiv2di;
35967 goto gather_gen;
35968 case IX86_BUILTIN_GATHERDIV4DI:
35969 icode = CODE_FOR_avx2_gatherdiv4di;
35970 goto gather_gen;
35971 case IX86_BUILTIN_GATHERSIV4SI:
35972 icode = CODE_FOR_avx2_gathersiv4si;
35973 goto gather_gen;
35974 case IX86_BUILTIN_GATHERSIV8SI:
35975 icode = CODE_FOR_avx2_gathersiv8si;
35976 goto gather_gen;
35977 case IX86_BUILTIN_GATHERDIV4SI:
35978 icode = CODE_FOR_avx2_gatherdiv4si;
35979 goto gather_gen;
35980 case IX86_BUILTIN_GATHERDIV8SI:
35981 icode = CODE_FOR_avx2_gatherdiv8si;
35982 goto gather_gen;
35983 case IX86_BUILTIN_GATHERALTSIV4DF:
35984 icode = CODE_FOR_avx2_gathersiv4df;
35985 goto gather_gen;
35986 case IX86_BUILTIN_GATHERALTDIV8SF:
35987 icode = CODE_FOR_avx2_gatherdiv8sf;
35988 goto gather_gen;
35989 case IX86_BUILTIN_GATHERALTSIV4DI:
35990 icode = CODE_FOR_avx2_gathersiv4di;
35991 goto gather_gen;
35992 case IX86_BUILTIN_GATHERALTDIV8SI:
35993 icode = CODE_FOR_avx2_gatherdiv8si;
35994 goto gather_gen;
35995 case IX86_BUILTIN_GATHER3SIV16SF:
35996 icode = CODE_FOR_avx512f_gathersiv16sf;
35997 goto gather_gen;
35998 case IX86_BUILTIN_GATHER3SIV8DF:
35999 icode = CODE_FOR_avx512f_gathersiv8df;
36000 goto gather_gen;
36001 case IX86_BUILTIN_GATHER3DIV16SF:
36002 icode = CODE_FOR_avx512f_gatherdiv16sf;
36003 goto gather_gen;
36004 case IX86_BUILTIN_GATHER3DIV8DF:
36005 icode = CODE_FOR_avx512f_gatherdiv8df;
36006 goto gather_gen;
36007 case IX86_BUILTIN_GATHER3SIV16SI:
36008 icode = CODE_FOR_avx512f_gathersiv16si;
36009 goto gather_gen;
36010 case IX86_BUILTIN_GATHER3SIV8DI:
36011 icode = CODE_FOR_avx512f_gathersiv8di;
36012 goto gather_gen;
36013 case IX86_BUILTIN_GATHER3DIV16SI:
36014 icode = CODE_FOR_avx512f_gatherdiv16si;
36015 goto gather_gen;
36016 case IX86_BUILTIN_GATHER3DIV8DI:
36017 icode = CODE_FOR_avx512f_gatherdiv8di;
36018 goto gather_gen;
36019 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36020 icode = CODE_FOR_avx512f_gathersiv8df;
36021 goto gather_gen;
36022 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36023 icode = CODE_FOR_avx512f_gatherdiv16sf;
36024 goto gather_gen;
36025 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36026 icode = CODE_FOR_avx512f_gathersiv8di;
36027 goto gather_gen;
36028 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36029 icode = CODE_FOR_avx512f_gatherdiv16si;
36030 goto gather_gen;
36031 case IX86_BUILTIN_SCATTERSIV16SF:
36032 icode = CODE_FOR_avx512f_scattersiv16sf;
36033 goto scatter_gen;
36034 case IX86_BUILTIN_SCATTERSIV8DF:
36035 icode = CODE_FOR_avx512f_scattersiv8df;
36036 goto scatter_gen;
36037 case IX86_BUILTIN_SCATTERDIV16SF:
36038 icode = CODE_FOR_avx512f_scatterdiv16sf;
36039 goto scatter_gen;
36040 case IX86_BUILTIN_SCATTERDIV8DF:
36041 icode = CODE_FOR_avx512f_scatterdiv8df;
36042 goto scatter_gen;
36043 case IX86_BUILTIN_SCATTERSIV16SI:
36044 icode = CODE_FOR_avx512f_scattersiv16si;
36045 goto scatter_gen;
36046 case IX86_BUILTIN_SCATTERSIV8DI:
36047 icode = CODE_FOR_avx512f_scattersiv8di;
36048 goto scatter_gen;
36049 case IX86_BUILTIN_SCATTERDIV16SI:
36050 icode = CODE_FOR_avx512f_scatterdiv16si;
36051 goto scatter_gen;
36052 case IX86_BUILTIN_SCATTERDIV8DI:
36053 icode = CODE_FOR_avx512f_scatterdiv8di;
36054 goto scatter_gen;
36055
36056 case IX86_BUILTIN_GATHERPFDPD:
36057 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36058 goto vec_prefetch_gen;
36059 case IX86_BUILTIN_GATHERPFDPS:
36060 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36061 goto vec_prefetch_gen;
36062 case IX86_BUILTIN_GATHERPFQPD:
36063 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36064 goto vec_prefetch_gen;
36065 case IX86_BUILTIN_GATHERPFQPS:
36066 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36067 goto vec_prefetch_gen;
36068 case IX86_BUILTIN_SCATTERPFDPD:
36069 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36070 goto vec_prefetch_gen;
36071 case IX86_BUILTIN_SCATTERPFDPS:
36072 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36073 goto vec_prefetch_gen;
36074 case IX86_BUILTIN_SCATTERPFQPD:
36075 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36076 goto vec_prefetch_gen;
36077 case IX86_BUILTIN_SCATTERPFQPS:
36078 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36079 goto vec_prefetch_gen;
36080
36081 gather_gen:
36082 rtx half;
36083 rtx (*gen) (rtx, rtx);
36084
36085 arg0 = CALL_EXPR_ARG (exp, 0);
36086 arg1 = CALL_EXPR_ARG (exp, 1);
36087 arg2 = CALL_EXPR_ARG (exp, 2);
36088 arg3 = CALL_EXPR_ARG (exp, 3);
36089 arg4 = CALL_EXPR_ARG (exp, 4);
36090 op0 = expand_normal (arg0);
36091 op1 = expand_normal (arg1);
36092 op2 = expand_normal (arg2);
36093 op3 = expand_normal (arg3);
36094 op4 = expand_normal (arg4);
36095 /* Note the arg order is different from the operand order. */
36096 mode0 = insn_data[icode].operand[1].mode;
36097 mode2 = insn_data[icode].operand[3].mode;
36098 mode3 = insn_data[icode].operand[4].mode;
36099 mode4 = insn_data[icode].operand[5].mode;
36100
36101 if (target == NULL_RTX
36102 || GET_MODE (target) != insn_data[icode].operand[0].mode
36103 || !insn_data[icode].operand[0].predicate (target,
36104 GET_MODE (target)))
36105 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36106 else
36107 subtarget = target;
36108
36109 switch (fcode)
36110 {
36111 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36112 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36113 half = gen_reg_rtx (V8SImode);
36114 if (!nonimmediate_operand (op2, V16SImode))
36115 op2 = copy_to_mode_reg (V16SImode, op2);
36116 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36117 op2 = half;
36118 break;
36119 case IX86_BUILTIN_GATHERALTSIV4DF:
36120 case IX86_BUILTIN_GATHERALTSIV4DI:
36121 half = gen_reg_rtx (V4SImode);
36122 if (!nonimmediate_operand (op2, V8SImode))
36123 op2 = copy_to_mode_reg (V8SImode, op2);
36124 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36125 op2 = half;
36126 break;
36127 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36128 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36129 half = gen_reg_rtx (mode0);
36130 if (mode0 == V8SFmode)
36131 gen = gen_vec_extract_lo_v16sf;
36132 else
36133 gen = gen_vec_extract_lo_v16si;
36134 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36135 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36136 emit_insn (gen (half, op0));
36137 op0 = half;
36138 if (GET_MODE (op3) != VOIDmode)
36139 {
36140 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36141 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36142 emit_insn (gen (half, op3));
36143 op3 = half;
36144 }
36145 break;
36146 case IX86_BUILTIN_GATHERALTDIV8SF:
36147 case IX86_BUILTIN_GATHERALTDIV8SI:
36148 half = gen_reg_rtx (mode0);
36149 if (mode0 == V4SFmode)
36150 gen = gen_vec_extract_lo_v8sf;
36151 else
36152 gen = gen_vec_extract_lo_v8si;
36153 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36154 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36155 emit_insn (gen (half, op0));
36156 op0 = half;
36157 if (GET_MODE (op3) != VOIDmode)
36158 {
36159 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36160 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36161 emit_insn (gen (half, op3));
36162 op3 = half;
36163 }
36164 break;
36165 default:
36166 break;
36167 }
36168
36169 /* Force memory operand only with base register here. But we
36170 don't want to do it on memory operand for other builtin
36171 functions. */
36172 op1 = ix86_zero_extend_to_Pmode (op1);
36173
36174 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36175 op0 = copy_to_mode_reg (mode0, op0);
36176 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36177 op1 = copy_to_mode_reg (Pmode, op1);
36178 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36179 op2 = copy_to_mode_reg (mode2, op2);
36180 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36181 {
36182 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36183 op3 = copy_to_mode_reg (mode3, op3);
36184 }
36185 else
36186 {
36187 op3 = copy_to_reg (op3);
36188 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36189 }
36190 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36191 {
36192 error ("the last argument must be scale 1, 2, 4, 8");
36193 return const0_rtx;
36194 }
36195
36196 /* Optimize. If mask is known to have all high bits set,
36197 replace op0 with pc_rtx to signal that the instruction
36198 overwrites the whole destination and doesn't use its
36199 previous contents. */
36200 if (optimize)
36201 {
36202 if (TREE_CODE (arg3) == INTEGER_CST)
36203 {
36204 if (integer_all_onesp (arg3))
36205 op0 = pc_rtx;
36206 }
36207 else if (TREE_CODE (arg3) == VECTOR_CST)
36208 {
36209 unsigned int negative = 0;
36210 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36211 {
36212 tree cst = VECTOR_CST_ELT (arg3, i);
36213 if (TREE_CODE (cst) == INTEGER_CST
36214 && tree_int_cst_sign_bit (cst))
36215 negative++;
36216 else if (TREE_CODE (cst) == REAL_CST
36217 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36218 negative++;
36219 }
36220 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36221 op0 = pc_rtx;
36222 }
36223 else if (TREE_CODE (arg3) == SSA_NAME
36224 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36225 {
36226 /* Recognize also when mask is like:
36227 __v2df src = _mm_setzero_pd ();
36228 __v2df mask = _mm_cmpeq_pd (src, src);
36229 or
36230 __v8sf src = _mm256_setzero_ps ();
36231 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36232 as that is a cheaper way to load all ones into
36233 a register than having to load a constant from
36234 memory. */
36235 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36236 if (is_gimple_call (def_stmt))
36237 {
36238 tree fndecl = gimple_call_fndecl (def_stmt);
36239 if (fndecl
36240 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36241 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36242 {
36243 case IX86_BUILTIN_CMPPD:
36244 case IX86_BUILTIN_CMPPS:
36245 case IX86_BUILTIN_CMPPD256:
36246 case IX86_BUILTIN_CMPPS256:
36247 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36248 break;
36249 /* FALLTHRU */
36250 case IX86_BUILTIN_CMPEQPD:
36251 case IX86_BUILTIN_CMPEQPS:
36252 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36253 && initializer_zerop (gimple_call_arg (def_stmt,
36254 1)))
36255 op0 = pc_rtx;
36256 break;
36257 default:
36258 break;
36259 }
36260 }
36261 }
36262 }
36263
36264 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36265 if (! pat)
36266 return const0_rtx;
36267 emit_insn (pat);
36268
36269 switch (fcode)
36270 {
36271 case IX86_BUILTIN_GATHER3DIV16SF:
36272 if (target == NULL_RTX)
36273 target = gen_reg_rtx (V8SFmode);
36274 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36275 break;
36276 case IX86_BUILTIN_GATHER3DIV16SI:
36277 if (target == NULL_RTX)
36278 target = gen_reg_rtx (V8SImode);
36279 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36280 break;
36281 case IX86_BUILTIN_GATHERDIV8SF:
36282 if (target == NULL_RTX)
36283 target = gen_reg_rtx (V4SFmode);
36284 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36285 break;
36286 case IX86_BUILTIN_GATHERDIV8SI:
36287 if (target == NULL_RTX)
36288 target = gen_reg_rtx (V4SImode);
36289 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36290 break;
36291 default:
36292 target = subtarget;
36293 break;
36294 }
36295 return target;
36296
36297 scatter_gen:
36298 arg0 = CALL_EXPR_ARG (exp, 0);
36299 arg1 = CALL_EXPR_ARG (exp, 1);
36300 arg2 = CALL_EXPR_ARG (exp, 2);
36301 arg3 = CALL_EXPR_ARG (exp, 3);
36302 arg4 = CALL_EXPR_ARG (exp, 4);
36303 op0 = expand_normal (arg0);
36304 op1 = expand_normal (arg1);
36305 op2 = expand_normal (arg2);
36306 op3 = expand_normal (arg3);
36307 op4 = expand_normal (arg4);
36308 mode1 = insn_data[icode].operand[1].mode;
36309 mode2 = insn_data[icode].operand[2].mode;
36310 mode3 = insn_data[icode].operand[3].mode;
36311 mode4 = insn_data[icode].operand[4].mode;
36312
36313 /* Force memory operand only with base register here. But we
36314 don't want to do it on memory operand for other builtin
36315 functions. */
36316 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36317
36318 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36319 op0 = copy_to_mode_reg (Pmode, op0);
36320
36321 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36322 {
36323 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36324 op1 = copy_to_mode_reg (mode1, op1);
36325 }
36326 else
36327 {
36328 op1 = copy_to_reg (op1);
36329 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36330 }
36331
36332 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36333 op2 = copy_to_mode_reg (mode2, op2);
36334
36335 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36336 op3 = copy_to_mode_reg (mode3, op3);
36337
36338 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36339 {
36340 error ("the last argument must be scale 1, 2, 4, 8");
36341 return const0_rtx;
36342 }
36343
36344 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36345 if (! pat)
36346 return const0_rtx;
36347
36348 emit_insn (pat);
36349 return 0;
36350
36351 vec_prefetch_gen:
36352 arg0 = CALL_EXPR_ARG (exp, 0);
36353 arg1 = CALL_EXPR_ARG (exp, 1);
36354 arg2 = CALL_EXPR_ARG (exp, 2);
36355 arg3 = CALL_EXPR_ARG (exp, 3);
36356 arg4 = CALL_EXPR_ARG (exp, 4);
36357 op0 = expand_normal (arg0);
36358 op1 = expand_normal (arg1);
36359 op2 = expand_normal (arg2);
36360 op3 = expand_normal (arg3);
36361 op4 = expand_normal (arg4);
36362 mode0 = insn_data[icode].operand[0].mode;
36363 mode1 = insn_data[icode].operand[1].mode;
36364 mode3 = insn_data[icode].operand[3].mode;
36365 mode4 = insn_data[icode].operand[4].mode;
36366
36367 if (GET_MODE (op0) == mode0
36368 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36369 {
36370 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36371 op0 = copy_to_mode_reg (mode0, op0);
36372 }
36373 else if (op0 != constm1_rtx)
36374 {
36375 op0 = copy_to_reg (op0);
36376 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36377 }
36378
36379 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36380 op1 = copy_to_mode_reg (mode1, op1);
36381
36382 /* Force memory operand only with base register here. But we
36383 don't want to do it on memory operand for other builtin
36384 functions. */
36385 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36386
36387 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36388 op2 = copy_to_mode_reg (Pmode, op2);
36389
36390 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36391 {
36392 error ("the forth argument must be scale 1, 2, 4, 8");
36393 return const0_rtx;
36394 }
36395
36396 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36397 {
36398 error ("incorrect hint operand");
36399 return const0_rtx;
36400 }
36401
36402 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36403 if (! pat)
36404 return const0_rtx;
36405
36406 emit_insn (pat);
36407
36408 return 0;
36409
36410 case IX86_BUILTIN_XABORT:
36411 icode = CODE_FOR_xabort;
36412 arg0 = CALL_EXPR_ARG (exp, 0);
36413 op0 = expand_normal (arg0);
36414 mode0 = insn_data[icode].operand[0].mode;
36415 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36416 {
36417 error ("the xabort's argument must be an 8-bit immediate");
36418 return const0_rtx;
36419 }
36420 emit_insn (gen_xabort (op0));
36421 return 0;
36422
36423 default:
36424 break;
36425 }
36426
36427 for (i = 0, d = bdesc_special_args;
36428 i < ARRAY_SIZE (bdesc_special_args);
36429 i++, d++)
36430 if (d->code == fcode)
36431 return ix86_expand_special_args_builtin (d, exp, target);
36432
36433 for (i = 0, d = bdesc_args;
36434 i < ARRAY_SIZE (bdesc_args);
36435 i++, d++)
36436 if (d->code == fcode)
36437 switch (fcode)
36438 {
36439 case IX86_BUILTIN_FABSQ:
36440 case IX86_BUILTIN_COPYSIGNQ:
36441 if (!TARGET_SSE)
36442 /* Emit a normal call if SSE isn't available. */
36443 return expand_call (exp, target, ignore);
36444 default:
36445 return ix86_expand_args_builtin (d, exp, target);
36446 }
36447
36448 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36449 if (d->code == fcode)
36450 return ix86_expand_sse_comi (d, exp, target);
36451
36452 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36453 if (d->code == fcode)
36454 return ix86_expand_round_builtin (d, exp, target);
36455
36456 for (i = 0, d = bdesc_pcmpestr;
36457 i < ARRAY_SIZE (bdesc_pcmpestr);
36458 i++, d++)
36459 if (d->code == fcode)
36460 return ix86_expand_sse_pcmpestr (d, exp, target);
36461
36462 for (i = 0, d = bdesc_pcmpistr;
36463 i < ARRAY_SIZE (bdesc_pcmpistr);
36464 i++, d++)
36465 if (d->code == fcode)
36466 return ix86_expand_sse_pcmpistr (d, exp, target);
36467
36468 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36469 if (d->code == fcode)
36470 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36471 (enum ix86_builtin_func_type)
36472 d->flag, d->comparison);
36473
36474 gcc_unreachable ();
36475 }
36476
36477 /* This returns the target-specific builtin with code CODE if
36478 current_function_decl has visibility on this builtin, which is checked
36479 using isa flags. Returns NULL_TREE otherwise. */
36480
36481 static tree ix86_get_builtin (enum ix86_builtins code)
36482 {
36483 struct cl_target_option *opts;
36484 tree target_tree = NULL_TREE;
36485
36486 /* Determine the isa flags of current_function_decl. */
36487
36488 if (current_function_decl)
36489 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36490
36491 if (target_tree == NULL)
36492 target_tree = target_option_default_node;
36493
36494 opts = TREE_TARGET_OPTION (target_tree);
36495
36496 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36497 return ix86_builtin_decl (code, true);
36498 else
36499 return NULL_TREE;
36500 }
36501
36502 /* Returns a function decl for a vectorized version of the builtin function
36503 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36504 if it is not available. */
36505
36506 static tree
36507 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36508 tree type_in)
36509 {
36510 enum machine_mode in_mode, out_mode;
36511 int in_n, out_n;
36512 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36513
36514 if (TREE_CODE (type_out) != VECTOR_TYPE
36515 || TREE_CODE (type_in) != VECTOR_TYPE
36516 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36517 return NULL_TREE;
36518
36519 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36520 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36521 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36522 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36523
36524 switch (fn)
36525 {
36526 case BUILT_IN_SQRT:
36527 if (out_mode == DFmode && in_mode == DFmode)
36528 {
36529 if (out_n == 2 && in_n == 2)
36530 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36531 else if (out_n == 4 && in_n == 4)
36532 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36533 else if (out_n == 8 && in_n == 8)
36534 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36535 }
36536 break;
36537
36538 case BUILT_IN_EXP2F:
36539 if (out_mode == SFmode && in_mode == SFmode)
36540 {
36541 if (out_n == 16 && in_n == 16)
36542 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36543 }
36544 break;
36545
36546 case BUILT_IN_SQRTF:
36547 if (out_mode == SFmode && in_mode == SFmode)
36548 {
36549 if (out_n == 4 && in_n == 4)
36550 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36551 else if (out_n == 8 && in_n == 8)
36552 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36553 else if (out_n == 16 && in_n == 16)
36554 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36555 }
36556 break;
36557
36558 case BUILT_IN_IFLOOR:
36559 case BUILT_IN_LFLOOR:
36560 case BUILT_IN_LLFLOOR:
36561 /* The round insn does not trap on denormals. */
36562 if (flag_trapping_math || !TARGET_ROUND)
36563 break;
36564
36565 if (out_mode == SImode && in_mode == DFmode)
36566 {
36567 if (out_n == 4 && in_n == 2)
36568 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36569 else if (out_n == 8 && in_n == 4)
36570 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36571 else if (out_n == 16 && in_n == 8)
36572 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36573 }
36574 break;
36575
36576 case BUILT_IN_IFLOORF:
36577 case BUILT_IN_LFLOORF:
36578 case BUILT_IN_LLFLOORF:
36579 /* The round insn does not trap on denormals. */
36580 if (flag_trapping_math || !TARGET_ROUND)
36581 break;
36582
36583 if (out_mode == SImode && in_mode == SFmode)
36584 {
36585 if (out_n == 4 && in_n == 4)
36586 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36587 else if (out_n == 8 && in_n == 8)
36588 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36589 }
36590 break;
36591
36592 case BUILT_IN_ICEIL:
36593 case BUILT_IN_LCEIL:
36594 case BUILT_IN_LLCEIL:
36595 /* The round insn does not trap on denormals. */
36596 if (flag_trapping_math || !TARGET_ROUND)
36597 break;
36598
36599 if (out_mode == SImode && in_mode == DFmode)
36600 {
36601 if (out_n == 4 && in_n == 2)
36602 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36603 else if (out_n == 8 && in_n == 4)
36604 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36605 else if (out_n == 16 && in_n == 8)
36606 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36607 }
36608 break;
36609
36610 case BUILT_IN_ICEILF:
36611 case BUILT_IN_LCEILF:
36612 case BUILT_IN_LLCEILF:
36613 /* The round insn does not trap on denormals. */
36614 if (flag_trapping_math || !TARGET_ROUND)
36615 break;
36616
36617 if (out_mode == SImode && in_mode == SFmode)
36618 {
36619 if (out_n == 4 && in_n == 4)
36620 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36621 else if (out_n == 8 && in_n == 8)
36622 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36623 }
36624 break;
36625
36626 case BUILT_IN_IRINT:
36627 case BUILT_IN_LRINT:
36628 case BUILT_IN_LLRINT:
36629 if (out_mode == SImode && in_mode == DFmode)
36630 {
36631 if (out_n == 4 && in_n == 2)
36632 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36633 else if (out_n == 8 && in_n == 4)
36634 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36635 }
36636 break;
36637
36638 case BUILT_IN_IRINTF:
36639 case BUILT_IN_LRINTF:
36640 case BUILT_IN_LLRINTF:
36641 if (out_mode == SImode && in_mode == SFmode)
36642 {
36643 if (out_n == 4 && in_n == 4)
36644 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36645 else if (out_n == 8 && in_n == 8)
36646 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36647 }
36648 break;
36649
36650 case BUILT_IN_IROUND:
36651 case BUILT_IN_LROUND:
36652 case BUILT_IN_LLROUND:
36653 /* The round insn does not trap on denormals. */
36654 if (flag_trapping_math || !TARGET_ROUND)
36655 break;
36656
36657 if (out_mode == SImode && in_mode == DFmode)
36658 {
36659 if (out_n == 4 && in_n == 2)
36660 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36661 else if (out_n == 8 && in_n == 4)
36662 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36663 else if (out_n == 16 && in_n == 8)
36664 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36665 }
36666 break;
36667
36668 case BUILT_IN_IROUNDF:
36669 case BUILT_IN_LROUNDF:
36670 case BUILT_IN_LLROUNDF:
36671 /* The round insn does not trap on denormals. */
36672 if (flag_trapping_math || !TARGET_ROUND)
36673 break;
36674
36675 if (out_mode == SImode && in_mode == SFmode)
36676 {
36677 if (out_n == 4 && in_n == 4)
36678 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36679 else if (out_n == 8 && in_n == 8)
36680 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36681 }
36682 break;
36683
36684 case BUILT_IN_COPYSIGN:
36685 if (out_mode == DFmode && in_mode == DFmode)
36686 {
36687 if (out_n == 2 && in_n == 2)
36688 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36689 else if (out_n == 4 && in_n == 4)
36690 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36691 else if (out_n == 8 && in_n == 8)
36692 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36693 }
36694 break;
36695
36696 case BUILT_IN_COPYSIGNF:
36697 if (out_mode == SFmode && in_mode == SFmode)
36698 {
36699 if (out_n == 4 && in_n == 4)
36700 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36701 else if (out_n == 8 && in_n == 8)
36702 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36703 else if (out_n == 16 && in_n == 16)
36704 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36705 }
36706 break;
36707
36708 case BUILT_IN_FLOOR:
36709 /* The round insn does not trap on denormals. */
36710 if (flag_trapping_math || !TARGET_ROUND)
36711 break;
36712
36713 if (out_mode == DFmode && in_mode == DFmode)
36714 {
36715 if (out_n == 2 && in_n == 2)
36716 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36717 else if (out_n == 4 && in_n == 4)
36718 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36719 }
36720 break;
36721
36722 case BUILT_IN_FLOORF:
36723 /* The round insn does not trap on denormals. */
36724 if (flag_trapping_math || !TARGET_ROUND)
36725 break;
36726
36727 if (out_mode == SFmode && in_mode == SFmode)
36728 {
36729 if (out_n == 4 && in_n == 4)
36730 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36731 else if (out_n == 8 && in_n == 8)
36732 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36733 }
36734 break;
36735
36736 case BUILT_IN_CEIL:
36737 /* The round insn does not trap on denormals. */
36738 if (flag_trapping_math || !TARGET_ROUND)
36739 break;
36740
36741 if (out_mode == DFmode && in_mode == DFmode)
36742 {
36743 if (out_n == 2 && in_n == 2)
36744 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36745 else if (out_n == 4 && in_n == 4)
36746 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36747 }
36748 break;
36749
36750 case BUILT_IN_CEILF:
36751 /* The round insn does not trap on denormals. */
36752 if (flag_trapping_math || !TARGET_ROUND)
36753 break;
36754
36755 if (out_mode == SFmode && in_mode == SFmode)
36756 {
36757 if (out_n == 4 && in_n == 4)
36758 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36759 else if (out_n == 8 && in_n == 8)
36760 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36761 }
36762 break;
36763
36764 case BUILT_IN_TRUNC:
36765 /* The round insn does not trap on denormals. */
36766 if (flag_trapping_math || !TARGET_ROUND)
36767 break;
36768
36769 if (out_mode == DFmode && in_mode == DFmode)
36770 {
36771 if (out_n == 2 && in_n == 2)
36772 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36773 else if (out_n == 4 && in_n == 4)
36774 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36775 }
36776 break;
36777
36778 case BUILT_IN_TRUNCF:
36779 /* The round insn does not trap on denormals. */
36780 if (flag_trapping_math || !TARGET_ROUND)
36781 break;
36782
36783 if (out_mode == SFmode && in_mode == SFmode)
36784 {
36785 if (out_n == 4 && in_n == 4)
36786 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36787 else if (out_n == 8 && in_n == 8)
36788 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36789 }
36790 break;
36791
36792 case BUILT_IN_RINT:
36793 /* The round insn does not trap on denormals. */
36794 if (flag_trapping_math || !TARGET_ROUND)
36795 break;
36796
36797 if (out_mode == DFmode && in_mode == DFmode)
36798 {
36799 if (out_n == 2 && in_n == 2)
36800 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36801 else if (out_n == 4 && in_n == 4)
36802 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36803 }
36804 break;
36805
36806 case BUILT_IN_RINTF:
36807 /* The round insn does not trap on denormals. */
36808 if (flag_trapping_math || !TARGET_ROUND)
36809 break;
36810
36811 if (out_mode == SFmode && in_mode == SFmode)
36812 {
36813 if (out_n == 4 && in_n == 4)
36814 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36815 else if (out_n == 8 && in_n == 8)
36816 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36817 }
36818 break;
36819
36820 case BUILT_IN_ROUND:
36821 /* The round insn does not trap on denormals. */
36822 if (flag_trapping_math || !TARGET_ROUND)
36823 break;
36824
36825 if (out_mode == DFmode && in_mode == DFmode)
36826 {
36827 if (out_n == 2 && in_n == 2)
36828 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36829 else if (out_n == 4 && in_n == 4)
36830 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36831 }
36832 break;
36833
36834 case BUILT_IN_ROUNDF:
36835 /* The round insn does not trap on denormals. */
36836 if (flag_trapping_math || !TARGET_ROUND)
36837 break;
36838
36839 if (out_mode == SFmode && in_mode == SFmode)
36840 {
36841 if (out_n == 4 && in_n == 4)
36842 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36843 else if (out_n == 8 && in_n == 8)
36844 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36845 }
36846 break;
36847
36848 case BUILT_IN_FMA:
36849 if (out_mode == DFmode && in_mode == DFmode)
36850 {
36851 if (out_n == 2 && in_n == 2)
36852 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36853 if (out_n == 4 && in_n == 4)
36854 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36855 }
36856 break;
36857
36858 case BUILT_IN_FMAF:
36859 if (out_mode == SFmode && in_mode == SFmode)
36860 {
36861 if (out_n == 4 && in_n == 4)
36862 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36863 if (out_n == 8 && in_n == 8)
36864 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36865 }
36866 break;
36867
36868 default:
36869 break;
36870 }
36871
36872 /* Dispatch to a handler for a vectorization library. */
36873 if (ix86_veclib_handler)
36874 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36875 type_in);
36876
36877 return NULL_TREE;
36878 }
36879
36880 /* Handler for an SVML-style interface to
36881 a library with vectorized intrinsics. */
36882
36883 static tree
36884 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36885 {
36886 char name[20];
36887 tree fntype, new_fndecl, args;
36888 unsigned arity;
36889 const char *bname;
36890 enum machine_mode el_mode, in_mode;
36891 int n, in_n;
36892
36893 /* The SVML is suitable for unsafe math only. */
36894 if (!flag_unsafe_math_optimizations)
36895 return NULL_TREE;
36896
36897 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36898 n = TYPE_VECTOR_SUBPARTS (type_out);
36899 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36900 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36901 if (el_mode != in_mode
36902 || n != in_n)
36903 return NULL_TREE;
36904
36905 switch (fn)
36906 {
36907 case BUILT_IN_EXP:
36908 case BUILT_IN_LOG:
36909 case BUILT_IN_LOG10:
36910 case BUILT_IN_POW:
36911 case BUILT_IN_TANH:
36912 case BUILT_IN_TAN:
36913 case BUILT_IN_ATAN:
36914 case BUILT_IN_ATAN2:
36915 case BUILT_IN_ATANH:
36916 case BUILT_IN_CBRT:
36917 case BUILT_IN_SINH:
36918 case BUILT_IN_SIN:
36919 case BUILT_IN_ASINH:
36920 case BUILT_IN_ASIN:
36921 case BUILT_IN_COSH:
36922 case BUILT_IN_COS:
36923 case BUILT_IN_ACOSH:
36924 case BUILT_IN_ACOS:
36925 if (el_mode != DFmode || n != 2)
36926 return NULL_TREE;
36927 break;
36928
36929 case BUILT_IN_EXPF:
36930 case BUILT_IN_LOGF:
36931 case BUILT_IN_LOG10F:
36932 case BUILT_IN_POWF:
36933 case BUILT_IN_TANHF:
36934 case BUILT_IN_TANF:
36935 case BUILT_IN_ATANF:
36936 case BUILT_IN_ATAN2F:
36937 case BUILT_IN_ATANHF:
36938 case BUILT_IN_CBRTF:
36939 case BUILT_IN_SINHF:
36940 case BUILT_IN_SINF:
36941 case BUILT_IN_ASINHF:
36942 case BUILT_IN_ASINF:
36943 case BUILT_IN_COSHF:
36944 case BUILT_IN_COSF:
36945 case BUILT_IN_ACOSHF:
36946 case BUILT_IN_ACOSF:
36947 if (el_mode != SFmode || n != 4)
36948 return NULL_TREE;
36949 break;
36950
36951 default:
36952 return NULL_TREE;
36953 }
36954
36955 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36956
36957 if (fn == BUILT_IN_LOGF)
36958 strcpy (name, "vmlsLn4");
36959 else if (fn == BUILT_IN_LOG)
36960 strcpy (name, "vmldLn2");
36961 else if (n == 4)
36962 {
36963 sprintf (name, "vmls%s", bname+10);
36964 name[strlen (name)-1] = '4';
36965 }
36966 else
36967 sprintf (name, "vmld%s2", bname+10);
36968
36969 /* Convert to uppercase. */
36970 name[4] &= ~0x20;
36971
36972 arity = 0;
36973 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36974 args;
36975 args = TREE_CHAIN (args))
36976 arity++;
36977
36978 if (arity == 1)
36979 fntype = build_function_type_list (type_out, type_in, NULL);
36980 else
36981 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36982
36983 /* Build a function declaration for the vectorized function. */
36984 new_fndecl = build_decl (BUILTINS_LOCATION,
36985 FUNCTION_DECL, get_identifier (name), fntype);
36986 TREE_PUBLIC (new_fndecl) = 1;
36987 DECL_EXTERNAL (new_fndecl) = 1;
36988 DECL_IS_NOVOPS (new_fndecl) = 1;
36989 TREE_READONLY (new_fndecl) = 1;
36990
36991 return new_fndecl;
36992 }
36993
36994 /* Handler for an ACML-style interface to
36995 a library with vectorized intrinsics. */
36996
36997 static tree
36998 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36999 {
37000 char name[20] = "__vr.._";
37001 tree fntype, new_fndecl, args;
37002 unsigned arity;
37003 const char *bname;
37004 enum machine_mode el_mode, in_mode;
37005 int n, in_n;
37006
37007 /* The ACML is 64bits only and suitable for unsafe math only as
37008 it does not correctly support parts of IEEE with the required
37009 precision such as denormals. */
37010 if (!TARGET_64BIT
37011 || !flag_unsafe_math_optimizations)
37012 return NULL_TREE;
37013
37014 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37015 n = TYPE_VECTOR_SUBPARTS (type_out);
37016 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37017 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37018 if (el_mode != in_mode
37019 || n != in_n)
37020 return NULL_TREE;
37021
37022 switch (fn)
37023 {
37024 case BUILT_IN_SIN:
37025 case BUILT_IN_COS:
37026 case BUILT_IN_EXP:
37027 case BUILT_IN_LOG:
37028 case BUILT_IN_LOG2:
37029 case BUILT_IN_LOG10:
37030 name[4] = 'd';
37031 name[5] = '2';
37032 if (el_mode != DFmode
37033 || n != 2)
37034 return NULL_TREE;
37035 break;
37036
37037 case BUILT_IN_SINF:
37038 case BUILT_IN_COSF:
37039 case BUILT_IN_EXPF:
37040 case BUILT_IN_POWF:
37041 case BUILT_IN_LOGF:
37042 case BUILT_IN_LOG2F:
37043 case BUILT_IN_LOG10F:
37044 name[4] = 's';
37045 name[5] = '4';
37046 if (el_mode != SFmode
37047 || n != 4)
37048 return NULL_TREE;
37049 break;
37050
37051 default:
37052 return NULL_TREE;
37053 }
37054
37055 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37056 sprintf (name + 7, "%s", bname+10);
37057
37058 arity = 0;
37059 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37060 args;
37061 args = TREE_CHAIN (args))
37062 arity++;
37063
37064 if (arity == 1)
37065 fntype = build_function_type_list (type_out, type_in, NULL);
37066 else
37067 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37068
37069 /* Build a function declaration for the vectorized function. */
37070 new_fndecl = build_decl (BUILTINS_LOCATION,
37071 FUNCTION_DECL, get_identifier (name), fntype);
37072 TREE_PUBLIC (new_fndecl) = 1;
37073 DECL_EXTERNAL (new_fndecl) = 1;
37074 DECL_IS_NOVOPS (new_fndecl) = 1;
37075 TREE_READONLY (new_fndecl) = 1;
37076
37077 return new_fndecl;
37078 }
37079
37080 /* Returns a decl of a function that implements gather load with
37081 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37082 Return NULL_TREE if it is not available. */
37083
37084 static tree
37085 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37086 const_tree index_type, int scale)
37087 {
37088 bool si;
37089 enum ix86_builtins code;
37090
37091 if (! TARGET_AVX2)
37092 return NULL_TREE;
37093
37094 if ((TREE_CODE (index_type) != INTEGER_TYPE
37095 && !POINTER_TYPE_P (index_type))
37096 || (TYPE_MODE (index_type) != SImode
37097 && TYPE_MODE (index_type) != DImode))
37098 return NULL_TREE;
37099
37100 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37101 return NULL_TREE;
37102
37103 /* v*gather* insn sign extends index to pointer mode. */
37104 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37105 && TYPE_UNSIGNED (index_type))
37106 return NULL_TREE;
37107
37108 if (scale <= 0
37109 || scale > 8
37110 || (scale & (scale - 1)) != 0)
37111 return NULL_TREE;
37112
37113 si = TYPE_MODE (index_type) == SImode;
37114 switch (TYPE_MODE (mem_vectype))
37115 {
37116 case V2DFmode:
37117 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37118 break;
37119 case V4DFmode:
37120 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37121 break;
37122 case V2DImode:
37123 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37124 break;
37125 case V4DImode:
37126 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37127 break;
37128 case V4SFmode:
37129 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37130 break;
37131 case V8SFmode:
37132 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37133 break;
37134 case V4SImode:
37135 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37136 break;
37137 case V8SImode:
37138 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37139 break;
37140 case V8DFmode:
37141 if (TARGET_AVX512F)
37142 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37143 else
37144 return NULL_TREE;
37145 break;
37146 case V8DImode:
37147 if (TARGET_AVX512F)
37148 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37149 else
37150 return NULL_TREE;
37151 break;
37152 case V16SFmode:
37153 if (TARGET_AVX512F)
37154 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37155 else
37156 return NULL_TREE;
37157 break;
37158 case V16SImode:
37159 if (TARGET_AVX512F)
37160 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37161 else
37162 return NULL_TREE;
37163 break;
37164 default:
37165 return NULL_TREE;
37166 }
37167
37168 return ix86_get_builtin (code);
37169 }
37170
37171 /* Returns a code for a target-specific builtin that implements
37172 reciprocal of the function, or NULL_TREE if not available. */
37173
37174 static tree
37175 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
37176 {
37177 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
37178 && flag_finite_math_only && !flag_trapping_math
37179 && flag_unsafe_math_optimizations))
37180 return NULL_TREE;
37181
37182 if (md_fn)
37183 /* Machine dependent builtins. */
37184 switch (fn)
37185 {
37186 /* Vectorized version of sqrt to rsqrt conversion. */
37187 case IX86_BUILTIN_SQRTPS_NR:
37188 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37189
37190 case IX86_BUILTIN_SQRTPS_NR256:
37191 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37192
37193 default:
37194 return NULL_TREE;
37195 }
37196 else
37197 /* Normal builtins. */
37198 switch (fn)
37199 {
37200 /* Sqrt to rsqrt conversion. */
37201 case BUILT_IN_SQRTF:
37202 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37203
37204 default:
37205 return NULL_TREE;
37206 }
37207 }
37208 \f
37209 /* Helper for avx_vpermilps256_operand et al. This is also used by
37210 the expansion functions to turn the parallel back into a mask.
37211 The return value is 0 for no match and the imm8+1 for a match. */
37212
37213 int
37214 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37215 {
37216 unsigned i, nelt = GET_MODE_NUNITS (mode);
37217 unsigned mask = 0;
37218 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37219
37220 if (XVECLEN (par, 0) != (int) nelt)
37221 return 0;
37222
37223 /* Validate that all of the elements are constants, and not totally
37224 out of range. Copy the data into an integral array to make the
37225 subsequent checks easier. */
37226 for (i = 0; i < nelt; ++i)
37227 {
37228 rtx er = XVECEXP (par, 0, i);
37229 unsigned HOST_WIDE_INT ei;
37230
37231 if (!CONST_INT_P (er))
37232 return 0;
37233 ei = INTVAL (er);
37234 if (ei >= nelt)
37235 return 0;
37236 ipar[i] = ei;
37237 }
37238
37239 switch (mode)
37240 {
37241 case V8DFmode:
37242 /* In the 512-bit DFmode case, we can only move elements within
37243 a 128-bit lane. First fill the second part of the mask,
37244 then fallthru. */
37245 for (i = 4; i < 6; ++i)
37246 {
37247 if (ipar[i] < 4 || ipar[i] >= 6)
37248 return 0;
37249 mask |= (ipar[i] - 4) << i;
37250 }
37251 for (i = 6; i < 8; ++i)
37252 {
37253 if (ipar[i] < 6)
37254 return 0;
37255 mask |= (ipar[i] - 6) << i;
37256 }
37257 /* FALLTHRU */
37258
37259 case V4DFmode:
37260 /* In the 256-bit DFmode case, we can only move elements within
37261 a 128-bit lane. */
37262 for (i = 0; i < 2; ++i)
37263 {
37264 if (ipar[i] >= 2)
37265 return 0;
37266 mask |= ipar[i] << i;
37267 }
37268 for (i = 2; i < 4; ++i)
37269 {
37270 if (ipar[i] < 2)
37271 return 0;
37272 mask |= (ipar[i] - 2) << i;
37273 }
37274 break;
37275
37276 case V16SFmode:
37277 /* In 512 bit SFmode case, permutation in the upper 256 bits
37278 must mirror the permutation in the lower 256-bits. */
37279 for (i = 0; i < 8; ++i)
37280 if (ipar[i] + 8 != ipar[i + 8])
37281 return 0;
37282 /* FALLTHRU */
37283
37284 case V8SFmode:
37285 /* In 256 bit SFmode case, we have full freedom of
37286 movement within the low 128-bit lane, but the high 128-bit
37287 lane must mirror the exact same pattern. */
37288 for (i = 0; i < 4; ++i)
37289 if (ipar[i] + 4 != ipar[i + 4])
37290 return 0;
37291 nelt = 4;
37292 /* FALLTHRU */
37293
37294 case V2DFmode:
37295 case V4SFmode:
37296 /* In the 128-bit case, we've full freedom in the placement of
37297 the elements from the source operand. */
37298 for (i = 0; i < nelt; ++i)
37299 mask |= ipar[i] << (i * (nelt / 2));
37300 break;
37301
37302 default:
37303 gcc_unreachable ();
37304 }
37305
37306 /* Make sure success has a non-zero value by adding one. */
37307 return mask + 1;
37308 }
37309
37310 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37311 the expansion functions to turn the parallel back into a mask.
37312 The return value is 0 for no match and the imm8+1 for a match. */
37313
37314 int
37315 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37316 {
37317 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37318 unsigned mask = 0;
37319 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37320
37321 if (XVECLEN (par, 0) != (int) nelt)
37322 return 0;
37323
37324 /* Validate that all of the elements are constants, and not totally
37325 out of range. Copy the data into an integral array to make the
37326 subsequent checks easier. */
37327 for (i = 0; i < nelt; ++i)
37328 {
37329 rtx er = XVECEXP (par, 0, i);
37330 unsigned HOST_WIDE_INT ei;
37331
37332 if (!CONST_INT_P (er))
37333 return 0;
37334 ei = INTVAL (er);
37335 if (ei >= 2 * nelt)
37336 return 0;
37337 ipar[i] = ei;
37338 }
37339
37340 /* Validate that the halves of the permute are halves. */
37341 for (i = 0; i < nelt2 - 1; ++i)
37342 if (ipar[i] + 1 != ipar[i + 1])
37343 return 0;
37344 for (i = nelt2; i < nelt - 1; ++i)
37345 if (ipar[i] + 1 != ipar[i + 1])
37346 return 0;
37347
37348 /* Reconstruct the mask. */
37349 for (i = 0; i < 2; ++i)
37350 {
37351 unsigned e = ipar[i * nelt2];
37352 if (e % nelt2)
37353 return 0;
37354 e /= nelt2;
37355 mask |= e << (i * 4);
37356 }
37357
37358 /* Make sure success has a non-zero value by adding one. */
37359 return mask + 1;
37360 }
37361 \f
37362 /* Return a register priority for hard reg REGNO. */
37363 static int
37364 ix86_register_priority (int hard_regno)
37365 {
37366 /* ebp and r13 as the base always wants a displacement, r12 as the
37367 base always wants an index. So discourage their usage in an
37368 address. */
37369 if (hard_regno == R12_REG || hard_regno == R13_REG)
37370 return 0;
37371 if (hard_regno == BP_REG)
37372 return 1;
37373 /* New x86-64 int registers result in bigger code size. Discourage
37374 them. */
37375 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37376 return 2;
37377 /* New x86-64 SSE registers result in bigger code size. Discourage
37378 them. */
37379 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37380 return 2;
37381 /* Usage of AX register results in smaller code. Prefer it. */
37382 if (hard_regno == 0)
37383 return 4;
37384 return 3;
37385 }
37386
37387 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37388
37389 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37390 QImode must go into class Q_REGS.
37391 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37392 movdf to do mem-to-mem moves through integer regs. */
37393
37394 static reg_class_t
37395 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37396 {
37397 enum machine_mode mode = GET_MODE (x);
37398
37399 /* We're only allowed to return a subclass of CLASS. Many of the
37400 following checks fail for NO_REGS, so eliminate that early. */
37401 if (regclass == NO_REGS)
37402 return NO_REGS;
37403
37404 /* All classes can load zeros. */
37405 if (x == CONST0_RTX (mode))
37406 return regclass;
37407
37408 /* Force constants into memory if we are loading a (nonzero) constant into
37409 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37410 instructions to load from a constant. */
37411 if (CONSTANT_P (x)
37412 && (MAYBE_MMX_CLASS_P (regclass)
37413 || MAYBE_SSE_CLASS_P (regclass)
37414 || MAYBE_MASK_CLASS_P (regclass)))
37415 return NO_REGS;
37416
37417 /* Prefer SSE regs only, if we can use them for math. */
37418 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37419 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37420
37421 /* Floating-point constants need more complex checks. */
37422 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37423 {
37424 /* General regs can load everything. */
37425 if (reg_class_subset_p (regclass, GENERAL_REGS))
37426 return regclass;
37427
37428 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37429 zero above. We only want to wind up preferring 80387 registers if
37430 we plan on doing computation with them. */
37431 if (TARGET_80387
37432 && standard_80387_constant_p (x) > 0)
37433 {
37434 /* Limit class to non-sse. */
37435 if (regclass == FLOAT_SSE_REGS)
37436 return FLOAT_REGS;
37437 if (regclass == FP_TOP_SSE_REGS)
37438 return FP_TOP_REG;
37439 if (regclass == FP_SECOND_SSE_REGS)
37440 return FP_SECOND_REG;
37441 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37442 return regclass;
37443 }
37444
37445 return NO_REGS;
37446 }
37447
37448 /* Generally when we see PLUS here, it's the function invariant
37449 (plus soft-fp const_int). Which can only be computed into general
37450 regs. */
37451 if (GET_CODE (x) == PLUS)
37452 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37453
37454 /* QImode constants are easy to load, but non-constant QImode data
37455 must go into Q_REGS. */
37456 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37457 {
37458 if (reg_class_subset_p (regclass, Q_REGS))
37459 return regclass;
37460 if (reg_class_subset_p (Q_REGS, regclass))
37461 return Q_REGS;
37462 return NO_REGS;
37463 }
37464
37465 return regclass;
37466 }
37467
37468 /* Discourage putting floating-point values in SSE registers unless
37469 SSE math is being used, and likewise for the 387 registers. */
37470 static reg_class_t
37471 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37472 {
37473 enum machine_mode mode = GET_MODE (x);
37474
37475 /* Restrict the output reload class to the register bank that we are doing
37476 math on. If we would like not to return a subset of CLASS, reject this
37477 alternative: if reload cannot do this, it will still use its choice. */
37478 mode = GET_MODE (x);
37479 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37480 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37481
37482 if (X87_FLOAT_MODE_P (mode))
37483 {
37484 if (regclass == FP_TOP_SSE_REGS)
37485 return FP_TOP_REG;
37486 else if (regclass == FP_SECOND_SSE_REGS)
37487 return FP_SECOND_REG;
37488 else
37489 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37490 }
37491
37492 return regclass;
37493 }
37494
37495 static reg_class_t
37496 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37497 enum machine_mode mode, secondary_reload_info *sri)
37498 {
37499 /* Double-word spills from general registers to non-offsettable memory
37500 references (zero-extended addresses) require special handling. */
37501 if (TARGET_64BIT
37502 && MEM_P (x)
37503 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37504 && INTEGER_CLASS_P (rclass)
37505 && !offsettable_memref_p (x))
37506 {
37507 sri->icode = (in_p
37508 ? CODE_FOR_reload_noff_load
37509 : CODE_FOR_reload_noff_store);
37510 /* Add the cost of moving address to a temporary. */
37511 sri->extra_cost = 1;
37512
37513 return NO_REGS;
37514 }
37515
37516 /* QImode spills from non-QI registers require
37517 intermediate register on 32bit targets. */
37518 if (mode == QImode
37519 && (MAYBE_MASK_CLASS_P (rclass)
37520 || (!TARGET_64BIT && !in_p
37521 && INTEGER_CLASS_P (rclass)
37522 && MAYBE_NON_Q_CLASS_P (rclass))))
37523 {
37524 int regno;
37525
37526 if (REG_P (x))
37527 regno = REGNO (x);
37528 else
37529 regno = -1;
37530
37531 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37532 regno = true_regnum (x);
37533
37534 /* Return Q_REGS if the operand is in memory. */
37535 if (regno == -1)
37536 return Q_REGS;
37537 }
37538
37539 /* This condition handles corner case where an expression involving
37540 pointers gets vectorized. We're trying to use the address of a
37541 stack slot as a vector initializer.
37542
37543 (set (reg:V2DI 74 [ vect_cst_.2 ])
37544 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37545
37546 Eventually frame gets turned into sp+offset like this:
37547
37548 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37549 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37550 (const_int 392 [0x188]))))
37551
37552 That later gets turned into:
37553
37554 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37555 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37556 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37557
37558 We'll have the following reload recorded:
37559
37560 Reload 0: reload_in (DI) =
37561 (plus:DI (reg/f:DI 7 sp)
37562 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37563 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37564 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37565 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37566 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37567 reload_reg_rtx: (reg:V2DI 22 xmm1)
37568
37569 Which isn't going to work since SSE instructions can't handle scalar
37570 additions. Returning GENERAL_REGS forces the addition into integer
37571 register and reload can handle subsequent reloads without problems. */
37572
37573 if (in_p && GET_CODE (x) == PLUS
37574 && SSE_CLASS_P (rclass)
37575 && SCALAR_INT_MODE_P (mode))
37576 return GENERAL_REGS;
37577
37578 return NO_REGS;
37579 }
37580
37581 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37582
37583 static bool
37584 ix86_class_likely_spilled_p (reg_class_t rclass)
37585 {
37586 switch (rclass)
37587 {
37588 case AREG:
37589 case DREG:
37590 case CREG:
37591 case BREG:
37592 case AD_REGS:
37593 case SIREG:
37594 case DIREG:
37595 case SSE_FIRST_REG:
37596 case FP_TOP_REG:
37597 case FP_SECOND_REG:
37598 return true;
37599
37600 default:
37601 break;
37602 }
37603
37604 return false;
37605 }
37606
37607 /* If we are copying between general and FP registers, we need a memory
37608 location. The same is true for SSE and MMX registers.
37609
37610 To optimize register_move_cost performance, allow inline variant.
37611
37612 The macro can't work reliably when one of the CLASSES is class containing
37613 registers from multiple units (SSE, MMX, integer). We avoid this by never
37614 combining those units in single alternative in the machine description.
37615 Ensure that this constraint holds to avoid unexpected surprises.
37616
37617 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37618 enforce these sanity checks. */
37619
37620 static inline bool
37621 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37622 enum machine_mode mode, int strict)
37623 {
37624 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37625 return false;
37626 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37627 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37628 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37629 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37630 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37631 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37632 {
37633 gcc_assert (!strict || lra_in_progress);
37634 return true;
37635 }
37636
37637 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37638 return true;
37639
37640 /* Between mask and general, we have moves no larger than word size. */
37641 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37642 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37643 return true;
37644
37645 /* ??? This is a lie. We do have moves between mmx/general, and for
37646 mmx/sse2. But by saying we need secondary memory we discourage the
37647 register allocator from using the mmx registers unless needed. */
37648 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37649 return true;
37650
37651 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37652 {
37653 /* SSE1 doesn't have any direct moves from other classes. */
37654 if (!TARGET_SSE2)
37655 return true;
37656
37657 /* If the target says that inter-unit moves are more expensive
37658 than moving through memory, then don't generate them. */
37659 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37660 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37661 return true;
37662
37663 /* Between SSE and general, we have moves no larger than word size. */
37664 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37665 return true;
37666 }
37667
37668 return false;
37669 }
37670
37671 bool
37672 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37673 enum machine_mode mode, int strict)
37674 {
37675 return inline_secondary_memory_needed (class1, class2, mode, strict);
37676 }
37677
37678 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37679
37680 On the 80386, this is the size of MODE in words,
37681 except in the FP regs, where a single reg is always enough. */
37682
37683 static unsigned char
37684 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37685 {
37686 if (MAYBE_INTEGER_CLASS_P (rclass))
37687 {
37688 if (mode == XFmode)
37689 return (TARGET_64BIT ? 2 : 3);
37690 else if (mode == XCmode)
37691 return (TARGET_64BIT ? 4 : 6);
37692 else
37693 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37694 }
37695 else
37696 {
37697 if (COMPLEX_MODE_P (mode))
37698 return 2;
37699 else
37700 return 1;
37701 }
37702 }
37703
37704 /* Return true if the registers in CLASS cannot represent the change from
37705 modes FROM to TO. */
37706
37707 bool
37708 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37709 enum reg_class regclass)
37710 {
37711 if (from == to)
37712 return false;
37713
37714 /* x87 registers can't do subreg at all, as all values are reformatted
37715 to extended precision. */
37716 if (MAYBE_FLOAT_CLASS_P (regclass))
37717 return true;
37718
37719 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37720 {
37721 /* Vector registers do not support QI or HImode loads. If we don't
37722 disallow a change to these modes, reload will assume it's ok to
37723 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37724 the vec_dupv4hi pattern. */
37725 if (GET_MODE_SIZE (from) < 4)
37726 return true;
37727 }
37728
37729 return false;
37730 }
37731
37732 /* Return the cost of moving data of mode M between a
37733 register and memory. A value of 2 is the default; this cost is
37734 relative to those in `REGISTER_MOVE_COST'.
37735
37736 This function is used extensively by register_move_cost that is used to
37737 build tables at startup. Make it inline in this case.
37738 When IN is 2, return maximum of in and out move cost.
37739
37740 If moving between registers and memory is more expensive than
37741 between two registers, you should define this macro to express the
37742 relative cost.
37743
37744 Model also increased moving costs of QImode registers in non
37745 Q_REGS classes.
37746 */
37747 static inline int
37748 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37749 int in)
37750 {
37751 int cost;
37752 if (FLOAT_CLASS_P (regclass))
37753 {
37754 int index;
37755 switch (mode)
37756 {
37757 case SFmode:
37758 index = 0;
37759 break;
37760 case DFmode:
37761 index = 1;
37762 break;
37763 case XFmode:
37764 index = 2;
37765 break;
37766 default:
37767 return 100;
37768 }
37769 if (in == 2)
37770 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37771 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37772 }
37773 if (SSE_CLASS_P (regclass))
37774 {
37775 int index;
37776 switch (GET_MODE_SIZE (mode))
37777 {
37778 case 4:
37779 index = 0;
37780 break;
37781 case 8:
37782 index = 1;
37783 break;
37784 case 16:
37785 index = 2;
37786 break;
37787 default:
37788 return 100;
37789 }
37790 if (in == 2)
37791 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37792 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37793 }
37794 if (MMX_CLASS_P (regclass))
37795 {
37796 int index;
37797 switch (GET_MODE_SIZE (mode))
37798 {
37799 case 4:
37800 index = 0;
37801 break;
37802 case 8:
37803 index = 1;
37804 break;
37805 default:
37806 return 100;
37807 }
37808 if (in)
37809 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37810 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37811 }
37812 switch (GET_MODE_SIZE (mode))
37813 {
37814 case 1:
37815 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37816 {
37817 if (!in)
37818 return ix86_cost->int_store[0];
37819 if (TARGET_PARTIAL_REG_DEPENDENCY
37820 && optimize_function_for_speed_p (cfun))
37821 cost = ix86_cost->movzbl_load;
37822 else
37823 cost = ix86_cost->int_load[0];
37824 if (in == 2)
37825 return MAX (cost, ix86_cost->int_store[0]);
37826 return cost;
37827 }
37828 else
37829 {
37830 if (in == 2)
37831 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37832 if (in)
37833 return ix86_cost->movzbl_load;
37834 else
37835 return ix86_cost->int_store[0] + 4;
37836 }
37837 break;
37838 case 2:
37839 if (in == 2)
37840 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37841 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37842 default:
37843 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37844 if (mode == TFmode)
37845 mode = XFmode;
37846 if (in == 2)
37847 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37848 else if (in)
37849 cost = ix86_cost->int_load[2];
37850 else
37851 cost = ix86_cost->int_store[2];
37852 return (cost * (((int) GET_MODE_SIZE (mode)
37853 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37854 }
37855 }
37856
37857 static int
37858 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37859 bool in)
37860 {
37861 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37862 }
37863
37864
37865 /* Return the cost of moving data from a register in class CLASS1 to
37866 one in class CLASS2.
37867
37868 It is not required that the cost always equal 2 when FROM is the same as TO;
37869 on some machines it is expensive to move between registers if they are not
37870 general registers. */
37871
37872 static int
37873 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37874 reg_class_t class2_i)
37875 {
37876 enum reg_class class1 = (enum reg_class) class1_i;
37877 enum reg_class class2 = (enum reg_class) class2_i;
37878
37879 /* In case we require secondary memory, compute cost of the store followed
37880 by load. In order to avoid bad register allocation choices, we need
37881 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37882
37883 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37884 {
37885 int cost = 1;
37886
37887 cost += inline_memory_move_cost (mode, class1, 2);
37888 cost += inline_memory_move_cost (mode, class2, 2);
37889
37890 /* In case of copying from general_purpose_register we may emit multiple
37891 stores followed by single load causing memory size mismatch stall.
37892 Count this as arbitrarily high cost of 20. */
37893 if (targetm.class_max_nregs (class1, mode)
37894 > targetm.class_max_nregs (class2, mode))
37895 cost += 20;
37896
37897 /* In the case of FP/MMX moves, the registers actually overlap, and we
37898 have to switch modes in order to treat them differently. */
37899 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37900 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37901 cost += 20;
37902
37903 return cost;
37904 }
37905
37906 /* Moves between SSE/MMX and integer unit are expensive. */
37907 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37908 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37909
37910 /* ??? By keeping returned value relatively high, we limit the number
37911 of moves between integer and MMX/SSE registers for all targets.
37912 Additionally, high value prevents problem with x86_modes_tieable_p(),
37913 where integer modes in MMX/SSE registers are not tieable
37914 because of missing QImode and HImode moves to, from or between
37915 MMX/SSE registers. */
37916 return MAX (8, ix86_cost->mmxsse_to_integer);
37917
37918 if (MAYBE_FLOAT_CLASS_P (class1))
37919 return ix86_cost->fp_move;
37920 if (MAYBE_SSE_CLASS_P (class1))
37921 return ix86_cost->sse_move;
37922 if (MAYBE_MMX_CLASS_P (class1))
37923 return ix86_cost->mmx_move;
37924 return 2;
37925 }
37926
37927 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37928 MODE. */
37929
37930 bool
37931 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37932 {
37933 /* Flags and only flags can only hold CCmode values. */
37934 if (CC_REGNO_P (regno))
37935 return GET_MODE_CLASS (mode) == MODE_CC;
37936 if (GET_MODE_CLASS (mode) == MODE_CC
37937 || GET_MODE_CLASS (mode) == MODE_RANDOM
37938 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37939 return false;
37940 if (STACK_REGNO_P (regno))
37941 return VALID_FP_MODE_P (mode);
37942 if (MASK_REGNO_P (regno))
37943 return (VALID_MASK_REG_MODE (mode)
37944 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37945 if (SSE_REGNO_P (regno))
37946 {
37947 /* We implement the move patterns for all vector modes into and
37948 out of SSE registers, even when no operation instructions
37949 are available. */
37950
37951 /* For AVX-512 we allow, regardless of regno:
37952 - XI mode
37953 - any of 512-bit wide vector mode
37954 - any scalar mode. */
37955 if (TARGET_AVX512F
37956 && (mode == XImode
37957 || VALID_AVX512F_REG_MODE (mode)
37958 || VALID_AVX512F_SCALAR_MODE (mode)))
37959 return true;
37960
37961 /* TODO check for QI/HI scalars. */
37962 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37963 if (TARGET_AVX512VL
37964 && (mode == OImode
37965 || mode == TImode
37966 || VALID_AVX256_REG_MODE (mode)
37967 || VALID_AVX512VL_128_REG_MODE (mode)))
37968 return true;
37969
37970 /* xmm16-xmm31 are only available for AVX-512. */
37971 if (EXT_REX_SSE_REGNO_P (regno))
37972 return false;
37973
37974 /* OImode and AVX modes are available only when AVX is enabled. */
37975 return ((TARGET_AVX
37976 && VALID_AVX256_REG_OR_OI_MODE (mode))
37977 || VALID_SSE_REG_MODE (mode)
37978 || VALID_SSE2_REG_MODE (mode)
37979 || VALID_MMX_REG_MODE (mode)
37980 || VALID_MMX_REG_MODE_3DNOW (mode));
37981 }
37982 if (MMX_REGNO_P (regno))
37983 {
37984 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37985 so if the register is available at all, then we can move data of
37986 the given mode into or out of it. */
37987 return (VALID_MMX_REG_MODE (mode)
37988 || VALID_MMX_REG_MODE_3DNOW (mode));
37989 }
37990
37991 if (mode == QImode)
37992 {
37993 /* Take care for QImode values - they can be in non-QI regs,
37994 but then they do cause partial register stalls. */
37995 if (ANY_QI_REGNO_P (regno))
37996 return true;
37997 if (!TARGET_PARTIAL_REG_STALL)
37998 return true;
37999 /* LRA checks if the hard register is OK for the given mode.
38000 QImode values can live in non-QI regs, so we allow all
38001 registers here. */
38002 if (lra_in_progress)
38003 return true;
38004 return !can_create_pseudo_p ();
38005 }
38006 /* We handle both integer and floats in the general purpose registers. */
38007 else if (VALID_INT_MODE_P (mode))
38008 return true;
38009 else if (VALID_FP_MODE_P (mode))
38010 return true;
38011 else if (VALID_DFP_MODE_P (mode))
38012 return true;
38013 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
38014 on to use that value in smaller contexts, this can easily force a
38015 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
38016 supporting DImode, allow it. */
38017 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
38018 return true;
38019
38020 return false;
38021 }
38022
38023 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
38024 tieable integer mode. */
38025
38026 static bool
38027 ix86_tieable_integer_mode_p (enum machine_mode mode)
38028 {
38029 switch (mode)
38030 {
38031 case HImode:
38032 case SImode:
38033 return true;
38034
38035 case QImode:
38036 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
38037
38038 case DImode:
38039 return TARGET_64BIT;
38040
38041 default:
38042 return false;
38043 }
38044 }
38045
38046 /* Return true if MODE1 is accessible in a register that can hold MODE2
38047 without copying. That is, all register classes that can hold MODE2
38048 can also hold MODE1. */
38049
38050 bool
38051 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
38052 {
38053 if (mode1 == mode2)
38054 return true;
38055
38056 if (ix86_tieable_integer_mode_p (mode1)
38057 && ix86_tieable_integer_mode_p (mode2))
38058 return true;
38059
38060 /* MODE2 being XFmode implies fp stack or general regs, which means we
38061 can tie any smaller floating point modes to it. Note that we do not
38062 tie this with TFmode. */
38063 if (mode2 == XFmode)
38064 return mode1 == SFmode || mode1 == DFmode;
38065
38066 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
38067 that we can tie it with SFmode. */
38068 if (mode2 == DFmode)
38069 return mode1 == SFmode;
38070
38071 /* If MODE2 is only appropriate for an SSE register, then tie with
38072 any other mode acceptable to SSE registers. */
38073 if (GET_MODE_SIZE (mode2) == 32
38074 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38075 return (GET_MODE_SIZE (mode1) == 32
38076 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38077 if (GET_MODE_SIZE (mode2) == 16
38078 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38079 return (GET_MODE_SIZE (mode1) == 16
38080 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38081
38082 /* If MODE2 is appropriate for an MMX register, then tie
38083 with any other mode acceptable to MMX registers. */
38084 if (GET_MODE_SIZE (mode2) == 8
38085 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
38086 return (GET_MODE_SIZE (mode1) == 8
38087 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
38088
38089 return false;
38090 }
38091
38092 /* Return the cost of moving between two registers of mode MODE. */
38093
38094 static int
38095 ix86_set_reg_reg_cost (enum machine_mode mode)
38096 {
38097 unsigned int units = UNITS_PER_WORD;
38098
38099 switch (GET_MODE_CLASS (mode))
38100 {
38101 default:
38102 break;
38103
38104 case MODE_CC:
38105 units = GET_MODE_SIZE (CCmode);
38106 break;
38107
38108 case MODE_FLOAT:
38109 if ((TARGET_SSE && mode == TFmode)
38110 || (TARGET_80387 && mode == XFmode)
38111 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
38112 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
38113 units = GET_MODE_SIZE (mode);
38114 break;
38115
38116 case MODE_COMPLEX_FLOAT:
38117 if ((TARGET_SSE && mode == TCmode)
38118 || (TARGET_80387 && mode == XCmode)
38119 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
38120 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38121 units = GET_MODE_SIZE (mode);
38122 break;
38123
38124 case MODE_VECTOR_INT:
38125 case MODE_VECTOR_FLOAT:
38126 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38127 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38128 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38129 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38130 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38131 units = GET_MODE_SIZE (mode);
38132 }
38133
38134 /* Return the cost of moving between two registers of mode MODE,
38135 assuming that the move will be in pieces of at most UNITS bytes. */
38136 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
38137 }
38138
38139 /* Compute a (partial) cost for rtx X. Return true if the complete
38140 cost has been computed, and false if subexpressions should be
38141 scanned. In either case, *TOTAL contains the cost result. */
38142
38143 static bool
38144 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
38145 bool speed)
38146 {
38147 rtx mask;
38148 enum rtx_code code = (enum rtx_code) code_i;
38149 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38150 enum machine_mode mode = GET_MODE (x);
38151 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38152
38153 switch (code)
38154 {
38155 case SET:
38156 if (register_operand (SET_DEST (x), VOIDmode)
38157 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38158 {
38159 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38160 return true;
38161 }
38162 return false;
38163
38164 case CONST_INT:
38165 case CONST:
38166 case LABEL_REF:
38167 case SYMBOL_REF:
38168 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38169 *total = 3;
38170 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38171 *total = 2;
38172 else if (flag_pic && SYMBOLIC_CONST (x)
38173 && !(TARGET_64BIT
38174 && (GET_CODE (x) == LABEL_REF
38175 || (GET_CODE (x) == SYMBOL_REF
38176 && SYMBOL_REF_LOCAL_P (x)))))
38177 *total = 1;
38178 else
38179 *total = 0;
38180 return true;
38181
38182 case CONST_DOUBLE:
38183 if (mode == VOIDmode)
38184 {
38185 *total = 0;
38186 return true;
38187 }
38188 switch (standard_80387_constant_p (x))
38189 {
38190 case 1: /* 0.0 */
38191 *total = 1;
38192 return true;
38193 default: /* Other constants */
38194 *total = 2;
38195 return true;
38196 case 0:
38197 case -1:
38198 break;
38199 }
38200 if (SSE_FLOAT_MODE_P (mode))
38201 {
38202 case CONST_VECTOR:
38203 switch (standard_sse_constant_p (x))
38204 {
38205 case 0:
38206 break;
38207 case 1: /* 0: xor eliminates false dependency */
38208 *total = 0;
38209 return true;
38210 default: /* -1: cmp contains false dependency */
38211 *total = 1;
38212 return true;
38213 }
38214 }
38215 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38216 it'll probably end up. Add a penalty for size. */
38217 *total = (COSTS_N_INSNS (1)
38218 + (flag_pic != 0 && !TARGET_64BIT)
38219 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38220 return true;
38221
38222 case ZERO_EXTEND:
38223 /* The zero extensions is often completely free on x86_64, so make
38224 it as cheap as possible. */
38225 if (TARGET_64BIT && mode == DImode
38226 && GET_MODE (XEXP (x, 0)) == SImode)
38227 *total = 1;
38228 else if (TARGET_ZERO_EXTEND_WITH_AND)
38229 *total = cost->add;
38230 else
38231 *total = cost->movzx;
38232 return false;
38233
38234 case SIGN_EXTEND:
38235 *total = cost->movsx;
38236 return false;
38237
38238 case ASHIFT:
38239 if (SCALAR_INT_MODE_P (mode)
38240 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38241 && CONST_INT_P (XEXP (x, 1)))
38242 {
38243 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38244 if (value == 1)
38245 {
38246 *total = cost->add;
38247 return false;
38248 }
38249 if ((value == 2 || value == 3)
38250 && cost->lea <= cost->shift_const)
38251 {
38252 *total = cost->lea;
38253 return false;
38254 }
38255 }
38256 /* FALLTHRU */
38257
38258 case ROTATE:
38259 case ASHIFTRT:
38260 case LSHIFTRT:
38261 case ROTATERT:
38262 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38263 {
38264 /* ??? Should be SSE vector operation cost. */
38265 /* At least for published AMD latencies, this really is the same
38266 as the latency for a simple fpu operation like fabs. */
38267 /* V*QImode is emulated with 1-11 insns. */
38268 if (mode == V16QImode || mode == V32QImode)
38269 {
38270 int count = 11;
38271 if (TARGET_XOP && mode == V16QImode)
38272 {
38273 /* For XOP we use vpshab, which requires a broadcast of the
38274 value to the variable shift insn. For constants this
38275 means a V16Q const in mem; even when we can perform the
38276 shift with one insn set the cost to prefer paddb. */
38277 if (CONSTANT_P (XEXP (x, 1)))
38278 {
38279 *total = (cost->fabs
38280 + rtx_cost (XEXP (x, 0), code, 0, speed)
38281 + (speed ? 2 : COSTS_N_BYTES (16)));
38282 return true;
38283 }
38284 count = 3;
38285 }
38286 else if (TARGET_SSSE3)
38287 count = 7;
38288 *total = cost->fabs * count;
38289 }
38290 else
38291 *total = cost->fabs;
38292 }
38293 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38294 {
38295 if (CONST_INT_P (XEXP (x, 1)))
38296 {
38297 if (INTVAL (XEXP (x, 1)) > 32)
38298 *total = cost->shift_const + COSTS_N_INSNS (2);
38299 else
38300 *total = cost->shift_const * 2;
38301 }
38302 else
38303 {
38304 if (GET_CODE (XEXP (x, 1)) == AND)
38305 *total = cost->shift_var * 2;
38306 else
38307 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38308 }
38309 }
38310 else
38311 {
38312 if (CONST_INT_P (XEXP (x, 1)))
38313 *total = cost->shift_const;
38314 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38315 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38316 {
38317 /* Return the cost after shift-and truncation. */
38318 *total = cost->shift_var;
38319 return true;
38320 }
38321 else
38322 *total = cost->shift_var;
38323 }
38324 return false;
38325
38326 case FMA:
38327 {
38328 rtx sub;
38329
38330 gcc_assert (FLOAT_MODE_P (mode));
38331 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38332
38333 /* ??? SSE scalar/vector cost should be used here. */
38334 /* ??? Bald assumption that fma has the same cost as fmul. */
38335 *total = cost->fmul;
38336 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38337
38338 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38339 sub = XEXP (x, 0);
38340 if (GET_CODE (sub) == NEG)
38341 sub = XEXP (sub, 0);
38342 *total += rtx_cost (sub, FMA, 0, speed);
38343
38344 sub = XEXP (x, 2);
38345 if (GET_CODE (sub) == NEG)
38346 sub = XEXP (sub, 0);
38347 *total += rtx_cost (sub, FMA, 2, speed);
38348 return true;
38349 }
38350
38351 case MULT:
38352 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38353 {
38354 /* ??? SSE scalar cost should be used here. */
38355 *total = cost->fmul;
38356 return false;
38357 }
38358 else if (X87_FLOAT_MODE_P (mode))
38359 {
38360 *total = cost->fmul;
38361 return false;
38362 }
38363 else if (FLOAT_MODE_P (mode))
38364 {
38365 /* ??? SSE vector cost should be used here. */
38366 *total = cost->fmul;
38367 return false;
38368 }
38369 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38370 {
38371 /* V*QImode is emulated with 7-13 insns. */
38372 if (mode == V16QImode || mode == V32QImode)
38373 {
38374 int extra = 11;
38375 if (TARGET_XOP && mode == V16QImode)
38376 extra = 5;
38377 else if (TARGET_SSSE3)
38378 extra = 6;
38379 *total = cost->fmul * 2 + cost->fabs * extra;
38380 }
38381 /* V*DImode is emulated with 5-8 insns. */
38382 else if (mode == V2DImode || mode == V4DImode)
38383 {
38384 if (TARGET_XOP && mode == V2DImode)
38385 *total = cost->fmul * 2 + cost->fabs * 3;
38386 else
38387 *total = cost->fmul * 3 + cost->fabs * 5;
38388 }
38389 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38390 insns, including two PMULUDQ. */
38391 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38392 *total = cost->fmul * 2 + cost->fabs * 5;
38393 else
38394 *total = cost->fmul;
38395 return false;
38396 }
38397 else
38398 {
38399 rtx op0 = XEXP (x, 0);
38400 rtx op1 = XEXP (x, 1);
38401 int nbits;
38402 if (CONST_INT_P (XEXP (x, 1)))
38403 {
38404 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38405 for (nbits = 0; value != 0; value &= value - 1)
38406 nbits++;
38407 }
38408 else
38409 /* This is arbitrary. */
38410 nbits = 7;
38411
38412 /* Compute costs correctly for widening multiplication. */
38413 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38414 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38415 == GET_MODE_SIZE (mode))
38416 {
38417 int is_mulwiden = 0;
38418 enum machine_mode inner_mode = GET_MODE (op0);
38419
38420 if (GET_CODE (op0) == GET_CODE (op1))
38421 is_mulwiden = 1, op1 = XEXP (op1, 0);
38422 else if (CONST_INT_P (op1))
38423 {
38424 if (GET_CODE (op0) == SIGN_EXTEND)
38425 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38426 == INTVAL (op1);
38427 else
38428 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38429 }
38430
38431 if (is_mulwiden)
38432 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38433 }
38434
38435 *total = (cost->mult_init[MODE_INDEX (mode)]
38436 + nbits * cost->mult_bit
38437 + rtx_cost (op0, outer_code, opno, speed)
38438 + rtx_cost (op1, outer_code, opno, speed));
38439
38440 return true;
38441 }
38442
38443 case DIV:
38444 case UDIV:
38445 case MOD:
38446 case UMOD:
38447 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38448 /* ??? SSE cost should be used here. */
38449 *total = cost->fdiv;
38450 else if (X87_FLOAT_MODE_P (mode))
38451 *total = cost->fdiv;
38452 else if (FLOAT_MODE_P (mode))
38453 /* ??? SSE vector cost should be used here. */
38454 *total = cost->fdiv;
38455 else
38456 *total = cost->divide[MODE_INDEX (mode)];
38457 return false;
38458
38459 case PLUS:
38460 if (GET_MODE_CLASS (mode) == MODE_INT
38461 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38462 {
38463 if (GET_CODE (XEXP (x, 0)) == PLUS
38464 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38465 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38466 && CONSTANT_P (XEXP (x, 1)))
38467 {
38468 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38469 if (val == 2 || val == 4 || val == 8)
38470 {
38471 *total = cost->lea;
38472 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38473 outer_code, opno, speed);
38474 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38475 outer_code, opno, speed);
38476 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38477 return true;
38478 }
38479 }
38480 else if (GET_CODE (XEXP (x, 0)) == MULT
38481 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38482 {
38483 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38484 if (val == 2 || val == 4 || val == 8)
38485 {
38486 *total = cost->lea;
38487 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38488 outer_code, opno, speed);
38489 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38490 return true;
38491 }
38492 }
38493 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38494 {
38495 *total = cost->lea;
38496 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38497 outer_code, opno, speed);
38498 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38499 outer_code, opno, speed);
38500 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38501 return true;
38502 }
38503 }
38504 /* FALLTHRU */
38505
38506 case MINUS:
38507 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38508 {
38509 /* ??? SSE cost should be used here. */
38510 *total = cost->fadd;
38511 return false;
38512 }
38513 else if (X87_FLOAT_MODE_P (mode))
38514 {
38515 *total = cost->fadd;
38516 return false;
38517 }
38518 else if (FLOAT_MODE_P (mode))
38519 {
38520 /* ??? SSE vector cost should be used here. */
38521 *total = cost->fadd;
38522 return false;
38523 }
38524 /* FALLTHRU */
38525
38526 case AND:
38527 case IOR:
38528 case XOR:
38529 if (GET_MODE_CLASS (mode) == MODE_INT
38530 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38531 {
38532 *total = (cost->add * 2
38533 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38534 << (GET_MODE (XEXP (x, 0)) != DImode))
38535 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38536 << (GET_MODE (XEXP (x, 1)) != DImode)));
38537 return true;
38538 }
38539 /* FALLTHRU */
38540
38541 case NEG:
38542 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38543 {
38544 /* ??? SSE cost should be used here. */
38545 *total = cost->fchs;
38546 return false;
38547 }
38548 else if (X87_FLOAT_MODE_P (mode))
38549 {
38550 *total = cost->fchs;
38551 return false;
38552 }
38553 else if (FLOAT_MODE_P (mode))
38554 {
38555 /* ??? SSE vector cost should be used here. */
38556 *total = cost->fchs;
38557 return false;
38558 }
38559 /* FALLTHRU */
38560
38561 case NOT:
38562 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38563 {
38564 /* ??? Should be SSE vector operation cost. */
38565 /* At least for published AMD latencies, this really is the same
38566 as the latency for a simple fpu operation like fabs. */
38567 *total = cost->fabs;
38568 }
38569 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38570 *total = cost->add * 2;
38571 else
38572 *total = cost->add;
38573 return false;
38574
38575 case COMPARE:
38576 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38577 && XEXP (XEXP (x, 0), 1) == const1_rtx
38578 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38579 && XEXP (x, 1) == const0_rtx)
38580 {
38581 /* This kind of construct is implemented using test[bwl].
38582 Treat it as if we had an AND. */
38583 *total = (cost->add
38584 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38585 + rtx_cost (const1_rtx, outer_code, opno, speed));
38586 return true;
38587 }
38588 return false;
38589
38590 case FLOAT_EXTEND:
38591 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38592 *total = 0;
38593 return false;
38594
38595 case ABS:
38596 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38597 /* ??? SSE cost should be used here. */
38598 *total = cost->fabs;
38599 else if (X87_FLOAT_MODE_P (mode))
38600 *total = cost->fabs;
38601 else if (FLOAT_MODE_P (mode))
38602 /* ??? SSE vector cost should be used here. */
38603 *total = cost->fabs;
38604 return false;
38605
38606 case SQRT:
38607 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38608 /* ??? SSE cost should be used here. */
38609 *total = cost->fsqrt;
38610 else if (X87_FLOAT_MODE_P (mode))
38611 *total = cost->fsqrt;
38612 else if (FLOAT_MODE_P (mode))
38613 /* ??? SSE vector cost should be used here. */
38614 *total = cost->fsqrt;
38615 return false;
38616
38617 case UNSPEC:
38618 if (XINT (x, 1) == UNSPEC_TP)
38619 *total = 0;
38620 return false;
38621
38622 case VEC_SELECT:
38623 case VEC_CONCAT:
38624 case VEC_DUPLICATE:
38625 /* ??? Assume all of these vector manipulation patterns are
38626 recognizable. In which case they all pretty much have the
38627 same cost. */
38628 *total = cost->fabs;
38629 return true;
38630 case VEC_MERGE:
38631 mask = XEXP (x, 2);
38632 /* This is masked instruction, assume the same cost,
38633 as nonmasked variant. */
38634 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38635 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38636 else
38637 *total = cost->fabs;
38638 return true;
38639
38640 default:
38641 return false;
38642 }
38643 }
38644
38645 #if TARGET_MACHO
38646
38647 static int current_machopic_label_num;
38648
38649 /* Given a symbol name and its associated stub, write out the
38650 definition of the stub. */
38651
38652 void
38653 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38654 {
38655 unsigned int length;
38656 char *binder_name, *symbol_name, lazy_ptr_name[32];
38657 int label = ++current_machopic_label_num;
38658
38659 /* For 64-bit we shouldn't get here. */
38660 gcc_assert (!TARGET_64BIT);
38661
38662 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38663 symb = targetm.strip_name_encoding (symb);
38664
38665 length = strlen (stub);
38666 binder_name = XALLOCAVEC (char, length + 32);
38667 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38668
38669 length = strlen (symb);
38670 symbol_name = XALLOCAVEC (char, length + 32);
38671 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38672
38673 sprintf (lazy_ptr_name, "L%d$lz", label);
38674
38675 if (MACHOPIC_ATT_STUB)
38676 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38677 else if (MACHOPIC_PURE)
38678 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38679 else
38680 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38681
38682 fprintf (file, "%s:\n", stub);
38683 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38684
38685 if (MACHOPIC_ATT_STUB)
38686 {
38687 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38688 }
38689 else if (MACHOPIC_PURE)
38690 {
38691 /* PIC stub. */
38692 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38693 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38694 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38695 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38696 label, lazy_ptr_name, label);
38697 fprintf (file, "\tjmp\t*%%ecx\n");
38698 }
38699 else
38700 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38701
38702 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38703 it needs no stub-binding-helper. */
38704 if (MACHOPIC_ATT_STUB)
38705 return;
38706
38707 fprintf (file, "%s:\n", binder_name);
38708
38709 if (MACHOPIC_PURE)
38710 {
38711 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38712 fprintf (file, "\tpushl\t%%ecx\n");
38713 }
38714 else
38715 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38716
38717 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38718
38719 /* N.B. Keep the correspondence of these
38720 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38721 old-pic/new-pic/non-pic stubs; altering this will break
38722 compatibility with existing dylibs. */
38723 if (MACHOPIC_PURE)
38724 {
38725 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38726 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38727 }
38728 else
38729 /* 16-byte -mdynamic-no-pic stub. */
38730 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38731
38732 fprintf (file, "%s:\n", lazy_ptr_name);
38733 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38734 fprintf (file, ASM_LONG "%s\n", binder_name);
38735 }
38736 #endif /* TARGET_MACHO */
38737
38738 /* Order the registers for register allocator. */
38739
38740 void
38741 x86_order_regs_for_local_alloc (void)
38742 {
38743 int pos = 0;
38744 int i;
38745
38746 /* First allocate the local general purpose registers. */
38747 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38748 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38749 reg_alloc_order [pos++] = i;
38750
38751 /* Global general purpose registers. */
38752 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38753 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38754 reg_alloc_order [pos++] = i;
38755
38756 /* x87 registers come first in case we are doing FP math
38757 using them. */
38758 if (!TARGET_SSE_MATH)
38759 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38760 reg_alloc_order [pos++] = i;
38761
38762 /* SSE registers. */
38763 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38764 reg_alloc_order [pos++] = i;
38765 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38766 reg_alloc_order [pos++] = i;
38767
38768 /* Extended REX SSE registers. */
38769 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38770 reg_alloc_order [pos++] = i;
38771
38772 /* Mask register. */
38773 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38774 reg_alloc_order [pos++] = i;
38775
38776 /* x87 registers. */
38777 if (TARGET_SSE_MATH)
38778 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38779 reg_alloc_order [pos++] = i;
38780
38781 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38782 reg_alloc_order [pos++] = i;
38783
38784 /* Initialize the rest of array as we do not allocate some registers
38785 at all. */
38786 while (pos < FIRST_PSEUDO_REGISTER)
38787 reg_alloc_order [pos++] = 0;
38788 }
38789
38790 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38791 in struct attribute_spec handler. */
38792 static tree
38793 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38794 tree args,
38795 int,
38796 bool *no_add_attrs)
38797 {
38798 if (TREE_CODE (*node) != FUNCTION_TYPE
38799 && TREE_CODE (*node) != METHOD_TYPE
38800 && TREE_CODE (*node) != FIELD_DECL
38801 && TREE_CODE (*node) != TYPE_DECL)
38802 {
38803 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38804 name);
38805 *no_add_attrs = true;
38806 return NULL_TREE;
38807 }
38808 if (TARGET_64BIT)
38809 {
38810 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38811 name);
38812 *no_add_attrs = true;
38813 return NULL_TREE;
38814 }
38815 if (is_attribute_p ("callee_pop_aggregate_return", name))
38816 {
38817 tree cst;
38818
38819 cst = TREE_VALUE (args);
38820 if (TREE_CODE (cst) != INTEGER_CST)
38821 {
38822 warning (OPT_Wattributes,
38823 "%qE attribute requires an integer constant argument",
38824 name);
38825 *no_add_attrs = true;
38826 }
38827 else if (compare_tree_int (cst, 0) != 0
38828 && compare_tree_int (cst, 1) != 0)
38829 {
38830 warning (OPT_Wattributes,
38831 "argument to %qE attribute is neither zero, nor one",
38832 name);
38833 *no_add_attrs = true;
38834 }
38835
38836 return NULL_TREE;
38837 }
38838
38839 return NULL_TREE;
38840 }
38841
38842 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38843 struct attribute_spec.handler. */
38844 static tree
38845 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38846 bool *no_add_attrs)
38847 {
38848 if (TREE_CODE (*node) != FUNCTION_TYPE
38849 && TREE_CODE (*node) != METHOD_TYPE
38850 && TREE_CODE (*node) != FIELD_DECL
38851 && TREE_CODE (*node) != TYPE_DECL)
38852 {
38853 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38854 name);
38855 *no_add_attrs = true;
38856 return NULL_TREE;
38857 }
38858
38859 /* Can combine regparm with all attributes but fastcall. */
38860 if (is_attribute_p ("ms_abi", name))
38861 {
38862 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38863 {
38864 error ("ms_abi and sysv_abi attributes are not compatible");
38865 }
38866
38867 return NULL_TREE;
38868 }
38869 else if (is_attribute_p ("sysv_abi", name))
38870 {
38871 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38872 {
38873 error ("ms_abi and sysv_abi attributes are not compatible");
38874 }
38875
38876 return NULL_TREE;
38877 }
38878
38879 return NULL_TREE;
38880 }
38881
38882 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38883 struct attribute_spec.handler. */
38884 static tree
38885 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38886 bool *no_add_attrs)
38887 {
38888 tree *type = NULL;
38889 if (DECL_P (*node))
38890 {
38891 if (TREE_CODE (*node) == TYPE_DECL)
38892 type = &TREE_TYPE (*node);
38893 }
38894 else
38895 type = node;
38896
38897 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38898 {
38899 warning (OPT_Wattributes, "%qE attribute ignored",
38900 name);
38901 *no_add_attrs = true;
38902 }
38903
38904 else if ((is_attribute_p ("ms_struct", name)
38905 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38906 || ((is_attribute_p ("gcc_struct", name)
38907 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38908 {
38909 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38910 name);
38911 *no_add_attrs = true;
38912 }
38913
38914 return NULL_TREE;
38915 }
38916
38917 static tree
38918 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38919 bool *no_add_attrs)
38920 {
38921 if (TREE_CODE (*node) != FUNCTION_DECL)
38922 {
38923 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38924 name);
38925 *no_add_attrs = true;
38926 }
38927 return NULL_TREE;
38928 }
38929
38930 static bool
38931 ix86_ms_bitfield_layout_p (const_tree record_type)
38932 {
38933 return ((TARGET_MS_BITFIELD_LAYOUT
38934 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38935 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38936 }
38937
38938 /* Returns an expression indicating where the this parameter is
38939 located on entry to the FUNCTION. */
38940
38941 static rtx
38942 x86_this_parameter (tree function)
38943 {
38944 tree type = TREE_TYPE (function);
38945 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38946 int nregs;
38947
38948 if (TARGET_64BIT)
38949 {
38950 const int *parm_regs;
38951
38952 if (ix86_function_type_abi (type) == MS_ABI)
38953 parm_regs = x86_64_ms_abi_int_parameter_registers;
38954 else
38955 parm_regs = x86_64_int_parameter_registers;
38956 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38957 }
38958
38959 nregs = ix86_function_regparm (type, function);
38960
38961 if (nregs > 0 && !stdarg_p (type))
38962 {
38963 int regno;
38964 unsigned int ccvt = ix86_get_callcvt (type);
38965
38966 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38967 regno = aggr ? DX_REG : CX_REG;
38968 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38969 {
38970 regno = CX_REG;
38971 if (aggr)
38972 return gen_rtx_MEM (SImode,
38973 plus_constant (Pmode, stack_pointer_rtx, 4));
38974 }
38975 else
38976 {
38977 regno = AX_REG;
38978 if (aggr)
38979 {
38980 regno = DX_REG;
38981 if (nregs == 1)
38982 return gen_rtx_MEM (SImode,
38983 plus_constant (Pmode,
38984 stack_pointer_rtx, 4));
38985 }
38986 }
38987 return gen_rtx_REG (SImode, regno);
38988 }
38989
38990 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38991 aggr ? 8 : 4));
38992 }
38993
38994 /* Determine whether x86_output_mi_thunk can succeed. */
38995
38996 static bool
38997 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38998 const_tree function)
38999 {
39000 /* 64-bit can handle anything. */
39001 if (TARGET_64BIT)
39002 return true;
39003
39004 /* For 32-bit, everything's fine if we have one free register. */
39005 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
39006 return true;
39007
39008 /* Need a free register for vcall_offset. */
39009 if (vcall_offset)
39010 return false;
39011
39012 /* Need a free register for GOT references. */
39013 if (flag_pic && !targetm.binds_local_p (function))
39014 return false;
39015
39016 /* Otherwise ok. */
39017 return true;
39018 }
39019
39020 /* Output the assembler code for a thunk function. THUNK_DECL is the
39021 declaration for the thunk function itself, FUNCTION is the decl for
39022 the target function. DELTA is an immediate constant offset to be
39023 added to THIS. If VCALL_OFFSET is nonzero, the word at
39024 *(*this + vcall_offset) should be added to THIS. */
39025
39026 static void
39027 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
39028 HOST_WIDE_INT vcall_offset, tree function)
39029 {
39030 rtx this_param = x86_this_parameter (function);
39031 rtx this_reg, tmp, fnaddr;
39032 unsigned int tmp_regno;
39033 rtx_insn *insn;
39034
39035 if (TARGET_64BIT)
39036 tmp_regno = R10_REG;
39037 else
39038 {
39039 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
39040 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39041 tmp_regno = AX_REG;
39042 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39043 tmp_regno = DX_REG;
39044 else
39045 tmp_regno = CX_REG;
39046 }
39047
39048 emit_note (NOTE_INSN_PROLOGUE_END);
39049
39050 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
39051 pull it in now and let DELTA benefit. */
39052 if (REG_P (this_param))
39053 this_reg = this_param;
39054 else if (vcall_offset)
39055 {
39056 /* Put the this parameter into %eax. */
39057 this_reg = gen_rtx_REG (Pmode, AX_REG);
39058 emit_move_insn (this_reg, this_param);
39059 }
39060 else
39061 this_reg = NULL_RTX;
39062
39063 /* Adjust the this parameter by a fixed constant. */
39064 if (delta)
39065 {
39066 rtx delta_rtx = GEN_INT (delta);
39067 rtx delta_dst = this_reg ? this_reg : this_param;
39068
39069 if (TARGET_64BIT)
39070 {
39071 if (!x86_64_general_operand (delta_rtx, Pmode))
39072 {
39073 tmp = gen_rtx_REG (Pmode, tmp_regno);
39074 emit_move_insn (tmp, delta_rtx);
39075 delta_rtx = tmp;
39076 }
39077 }
39078
39079 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
39080 }
39081
39082 /* Adjust the this parameter by a value stored in the vtable. */
39083 if (vcall_offset)
39084 {
39085 rtx vcall_addr, vcall_mem, this_mem;
39086
39087 tmp = gen_rtx_REG (Pmode, tmp_regno);
39088
39089 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
39090 if (Pmode != ptr_mode)
39091 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
39092 emit_move_insn (tmp, this_mem);
39093
39094 /* Adjust the this parameter. */
39095 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
39096 if (TARGET_64BIT
39097 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
39098 {
39099 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
39100 emit_move_insn (tmp2, GEN_INT (vcall_offset));
39101 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
39102 }
39103
39104 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
39105 if (Pmode != ptr_mode)
39106 emit_insn (gen_addsi_1_zext (this_reg,
39107 gen_rtx_REG (ptr_mode,
39108 REGNO (this_reg)),
39109 vcall_mem));
39110 else
39111 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
39112 }
39113
39114 /* If necessary, drop THIS back to its stack slot. */
39115 if (this_reg && this_reg != this_param)
39116 emit_move_insn (this_param, this_reg);
39117
39118 fnaddr = XEXP (DECL_RTL (function), 0);
39119 if (TARGET_64BIT)
39120 {
39121 if (!flag_pic || targetm.binds_local_p (function)
39122 || TARGET_PECOFF)
39123 ;
39124 else
39125 {
39126 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39127 tmp = gen_rtx_CONST (Pmode, tmp);
39128 fnaddr = gen_const_mem (Pmode, tmp);
39129 }
39130 }
39131 else
39132 {
39133 if (!flag_pic || targetm.binds_local_p (function))
39134 ;
39135 #if TARGET_MACHO
39136 else if (TARGET_MACHO)
39137 {
39138 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39139 fnaddr = XEXP (fnaddr, 0);
39140 }
39141 #endif /* TARGET_MACHO */
39142 else
39143 {
39144 tmp = gen_rtx_REG (Pmode, CX_REG);
39145 output_set_got (tmp, NULL_RTX);
39146
39147 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39148 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39149 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39150 fnaddr = gen_const_mem (Pmode, fnaddr);
39151 }
39152 }
39153
39154 /* Our sibling call patterns do not allow memories, because we have no
39155 predicate that can distinguish between frame and non-frame memory.
39156 For our purposes here, we can get away with (ab)using a jump pattern,
39157 because we're going to do no optimization. */
39158 if (MEM_P (fnaddr))
39159 {
39160 if (sibcall_insn_operand (fnaddr, word_mode))
39161 {
39162 fnaddr = XEXP (DECL_RTL (function), 0);
39163 tmp = gen_rtx_MEM (QImode, fnaddr);
39164 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39165 tmp = emit_call_insn (tmp);
39166 SIBLING_CALL_P (tmp) = 1;
39167 }
39168 else
39169 emit_jump_insn (gen_indirect_jump (fnaddr));
39170 }
39171 else
39172 {
39173 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39174 fnaddr = legitimize_pic_address (fnaddr,
39175 gen_rtx_REG (Pmode, tmp_regno));
39176
39177 if (!sibcall_insn_operand (fnaddr, word_mode))
39178 {
39179 tmp = gen_rtx_REG (word_mode, tmp_regno);
39180 if (GET_MODE (fnaddr) != word_mode)
39181 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39182 emit_move_insn (tmp, fnaddr);
39183 fnaddr = tmp;
39184 }
39185
39186 tmp = gen_rtx_MEM (QImode, fnaddr);
39187 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39188 tmp = emit_call_insn (tmp);
39189 SIBLING_CALL_P (tmp) = 1;
39190 }
39191 emit_barrier ();
39192
39193 /* Emit just enough of rest_of_compilation to get the insns emitted.
39194 Note that use_thunk calls assemble_start_function et al. */
39195 insn = get_insns ();
39196 shorten_branches (insn);
39197 final_start_function (insn, file, 1);
39198 final (insn, file, 1);
39199 final_end_function ();
39200 }
39201
39202 static void
39203 x86_file_start (void)
39204 {
39205 default_file_start ();
39206 if (TARGET_16BIT)
39207 fputs ("\t.code16gcc\n", asm_out_file);
39208 #if TARGET_MACHO
39209 darwin_file_start ();
39210 #endif
39211 if (X86_FILE_START_VERSION_DIRECTIVE)
39212 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39213 if (X86_FILE_START_FLTUSED)
39214 fputs ("\t.global\t__fltused\n", asm_out_file);
39215 if (ix86_asm_dialect == ASM_INTEL)
39216 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39217 }
39218
39219 int
39220 x86_field_alignment (tree field, int computed)
39221 {
39222 enum machine_mode mode;
39223 tree type = TREE_TYPE (field);
39224
39225 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39226 return computed;
39227 mode = TYPE_MODE (strip_array_types (type));
39228 if (mode == DFmode || mode == DCmode
39229 || GET_MODE_CLASS (mode) == MODE_INT
39230 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39231 return MIN (32, computed);
39232 return computed;
39233 }
39234
39235 /* Print call to TARGET to FILE. */
39236
39237 static void
39238 x86_print_call_or_nop (FILE *file, const char *target)
39239 {
39240 if (flag_nop_mcount)
39241 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
39242 else
39243 fprintf (file, "1:\tcall\t%s\n", target);
39244 }
39245
39246 /* Output assembler code to FILE to increment profiler label # LABELNO
39247 for profiling a function entry. */
39248 void
39249 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39250 {
39251 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39252 : MCOUNT_NAME);
39253 if (TARGET_64BIT)
39254 {
39255 #ifndef NO_PROFILE_COUNTERS
39256 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39257 #endif
39258
39259 if (!TARGET_PECOFF && flag_pic)
39260 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39261 else
39262 x86_print_call_or_nop (file, mcount_name);
39263 }
39264 else if (flag_pic)
39265 {
39266 #ifndef NO_PROFILE_COUNTERS
39267 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39268 LPREFIX, labelno);
39269 #endif
39270 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39271 }
39272 else
39273 {
39274 #ifndef NO_PROFILE_COUNTERS
39275 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39276 LPREFIX, labelno);
39277 #endif
39278 x86_print_call_or_nop (file, mcount_name);
39279 }
39280
39281 if (flag_record_mcount)
39282 {
39283 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
39284 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
39285 fprintf (file, "\t.previous\n");
39286 }
39287 }
39288
39289 /* We don't have exact information about the insn sizes, but we may assume
39290 quite safely that we are informed about all 1 byte insns and memory
39291 address sizes. This is enough to eliminate unnecessary padding in
39292 99% of cases. */
39293
39294 static int
39295 min_insn_size (rtx_insn *insn)
39296 {
39297 int l = 0, len;
39298
39299 if (!INSN_P (insn) || !active_insn_p (insn))
39300 return 0;
39301
39302 /* Discard alignments we've emit and jump instructions. */
39303 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39304 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39305 return 0;
39306
39307 /* Important case - calls are always 5 bytes.
39308 It is common to have many calls in the row. */
39309 if (CALL_P (insn)
39310 && symbolic_reference_mentioned_p (PATTERN (insn))
39311 && !SIBLING_CALL_P (insn))
39312 return 5;
39313 len = get_attr_length (insn);
39314 if (len <= 1)
39315 return 1;
39316
39317 /* For normal instructions we rely on get_attr_length being exact,
39318 with a few exceptions. */
39319 if (!JUMP_P (insn))
39320 {
39321 enum attr_type type = get_attr_type (insn);
39322
39323 switch (type)
39324 {
39325 case TYPE_MULTI:
39326 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39327 || asm_noperands (PATTERN (insn)) >= 0)
39328 return 0;
39329 break;
39330 case TYPE_OTHER:
39331 case TYPE_FCMP:
39332 break;
39333 default:
39334 /* Otherwise trust get_attr_length. */
39335 return len;
39336 }
39337
39338 l = get_attr_length_address (insn);
39339 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39340 l = 4;
39341 }
39342 if (l)
39343 return 1+l;
39344 else
39345 return 2;
39346 }
39347
39348 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39349
39350 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39351 window. */
39352
39353 static void
39354 ix86_avoid_jump_mispredicts (void)
39355 {
39356 rtx_insn *insn, *start = get_insns ();
39357 int nbytes = 0, njumps = 0;
39358 int isjump = 0;
39359
39360 /* Look for all minimal intervals of instructions containing 4 jumps.
39361 The intervals are bounded by START and INSN. NBYTES is the total
39362 size of instructions in the interval including INSN and not including
39363 START. When the NBYTES is smaller than 16 bytes, it is possible
39364 that the end of START and INSN ends up in the same 16byte page.
39365
39366 The smallest offset in the page INSN can start is the case where START
39367 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39368 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39369
39370 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39371 have to, control transfer to label(s) can be performed through other
39372 means, and also we estimate minimum length of all asm stmts as 0. */
39373 for (insn = start; insn; insn = NEXT_INSN (insn))
39374 {
39375 int min_size;
39376
39377 if (LABEL_P (insn))
39378 {
39379 int align = label_to_alignment (insn);
39380 int max_skip = label_to_max_skip (insn);
39381
39382 if (max_skip > 15)
39383 max_skip = 15;
39384 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39385 already in the current 16 byte page, because otherwise
39386 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39387 bytes to reach 16 byte boundary. */
39388 if (align <= 0
39389 || (align <= 3 && max_skip != (1 << align) - 1))
39390 max_skip = 0;
39391 if (dump_file)
39392 fprintf (dump_file, "Label %i with max_skip %i\n",
39393 INSN_UID (insn), max_skip);
39394 if (max_skip)
39395 {
39396 while (nbytes + max_skip >= 16)
39397 {
39398 start = NEXT_INSN (start);
39399 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39400 || CALL_P (start))
39401 njumps--, isjump = 1;
39402 else
39403 isjump = 0;
39404 nbytes -= min_insn_size (start);
39405 }
39406 }
39407 continue;
39408 }
39409
39410 min_size = min_insn_size (insn);
39411 nbytes += min_size;
39412 if (dump_file)
39413 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39414 INSN_UID (insn), min_size);
39415 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39416 || CALL_P (insn))
39417 njumps++;
39418 else
39419 continue;
39420
39421 while (njumps > 3)
39422 {
39423 start = NEXT_INSN (start);
39424 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39425 || CALL_P (start))
39426 njumps--, isjump = 1;
39427 else
39428 isjump = 0;
39429 nbytes -= min_insn_size (start);
39430 }
39431 gcc_assert (njumps >= 0);
39432 if (dump_file)
39433 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39434 INSN_UID (start), INSN_UID (insn), nbytes);
39435
39436 if (njumps == 3 && isjump && nbytes < 16)
39437 {
39438 int padsize = 15 - nbytes + min_insn_size (insn);
39439
39440 if (dump_file)
39441 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39442 INSN_UID (insn), padsize);
39443 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39444 }
39445 }
39446 }
39447 #endif
39448
39449 /* AMD Athlon works faster
39450 when RET is not destination of conditional jump or directly preceded
39451 by other jump instruction. We avoid the penalty by inserting NOP just
39452 before the RET instructions in such cases. */
39453 static void
39454 ix86_pad_returns (void)
39455 {
39456 edge e;
39457 edge_iterator ei;
39458
39459 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39460 {
39461 basic_block bb = e->src;
39462 rtx_insn *ret = BB_END (bb);
39463 rtx_insn *prev;
39464 bool replace = false;
39465
39466 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39467 || optimize_bb_for_size_p (bb))
39468 continue;
39469 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39470 if (active_insn_p (prev) || LABEL_P (prev))
39471 break;
39472 if (prev && LABEL_P (prev))
39473 {
39474 edge e;
39475 edge_iterator ei;
39476
39477 FOR_EACH_EDGE (e, ei, bb->preds)
39478 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39479 && !(e->flags & EDGE_FALLTHRU))
39480 {
39481 replace = true;
39482 break;
39483 }
39484 }
39485 if (!replace)
39486 {
39487 prev = prev_active_insn (ret);
39488 if (prev
39489 && ((JUMP_P (prev) && any_condjump_p (prev))
39490 || CALL_P (prev)))
39491 replace = true;
39492 /* Empty functions get branch mispredict even when
39493 the jump destination is not visible to us. */
39494 if (!prev && !optimize_function_for_size_p (cfun))
39495 replace = true;
39496 }
39497 if (replace)
39498 {
39499 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39500 delete_insn (ret);
39501 }
39502 }
39503 }
39504
39505 /* Count the minimum number of instructions in BB. Return 4 if the
39506 number of instructions >= 4. */
39507
39508 static int
39509 ix86_count_insn_bb (basic_block bb)
39510 {
39511 rtx_insn *insn;
39512 int insn_count = 0;
39513
39514 /* Count number of instructions in this block. Return 4 if the number
39515 of instructions >= 4. */
39516 FOR_BB_INSNS (bb, insn)
39517 {
39518 /* Only happen in exit blocks. */
39519 if (JUMP_P (insn)
39520 && ANY_RETURN_P (PATTERN (insn)))
39521 break;
39522
39523 if (NONDEBUG_INSN_P (insn)
39524 && GET_CODE (PATTERN (insn)) != USE
39525 && GET_CODE (PATTERN (insn)) != CLOBBER)
39526 {
39527 insn_count++;
39528 if (insn_count >= 4)
39529 return insn_count;
39530 }
39531 }
39532
39533 return insn_count;
39534 }
39535
39536
39537 /* Count the minimum number of instructions in code path in BB.
39538 Return 4 if the number of instructions >= 4. */
39539
39540 static int
39541 ix86_count_insn (basic_block bb)
39542 {
39543 edge e;
39544 edge_iterator ei;
39545 int min_prev_count;
39546
39547 /* Only bother counting instructions along paths with no
39548 more than 2 basic blocks between entry and exit. Given
39549 that BB has an edge to exit, determine if a predecessor
39550 of BB has an edge from entry. If so, compute the number
39551 of instructions in the predecessor block. If there
39552 happen to be multiple such blocks, compute the minimum. */
39553 min_prev_count = 4;
39554 FOR_EACH_EDGE (e, ei, bb->preds)
39555 {
39556 edge prev_e;
39557 edge_iterator prev_ei;
39558
39559 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39560 {
39561 min_prev_count = 0;
39562 break;
39563 }
39564 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39565 {
39566 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39567 {
39568 int count = ix86_count_insn_bb (e->src);
39569 if (count < min_prev_count)
39570 min_prev_count = count;
39571 break;
39572 }
39573 }
39574 }
39575
39576 if (min_prev_count < 4)
39577 min_prev_count += ix86_count_insn_bb (bb);
39578
39579 return min_prev_count;
39580 }
39581
39582 /* Pad short function to 4 instructions. */
39583
39584 static void
39585 ix86_pad_short_function (void)
39586 {
39587 edge e;
39588 edge_iterator ei;
39589
39590 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39591 {
39592 rtx_insn *ret = BB_END (e->src);
39593 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39594 {
39595 int insn_count = ix86_count_insn (e->src);
39596
39597 /* Pad short function. */
39598 if (insn_count < 4)
39599 {
39600 rtx_insn *insn = ret;
39601
39602 /* Find epilogue. */
39603 while (insn
39604 && (!NOTE_P (insn)
39605 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39606 insn = PREV_INSN (insn);
39607
39608 if (!insn)
39609 insn = ret;
39610
39611 /* Two NOPs count as one instruction. */
39612 insn_count = 2 * (4 - insn_count);
39613 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39614 }
39615 }
39616 }
39617 }
39618
39619 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39620 the epilogue, the Windows system unwinder will apply epilogue logic and
39621 produce incorrect offsets. This can be avoided by adding a nop between
39622 the last insn that can throw and the first insn of the epilogue. */
39623
39624 static void
39625 ix86_seh_fixup_eh_fallthru (void)
39626 {
39627 edge e;
39628 edge_iterator ei;
39629
39630 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39631 {
39632 rtx_insn *insn, *next;
39633
39634 /* Find the beginning of the epilogue. */
39635 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39636 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39637 break;
39638 if (insn == NULL)
39639 continue;
39640
39641 /* We only care about preceding insns that can throw. */
39642 insn = prev_active_insn (insn);
39643 if (insn == NULL || !can_throw_internal (insn))
39644 continue;
39645
39646 /* Do not separate calls from their debug information. */
39647 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39648 if (NOTE_P (next)
39649 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39650 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39651 insn = next;
39652 else
39653 break;
39654
39655 emit_insn_after (gen_nops (const1_rtx), insn);
39656 }
39657 }
39658
39659 /* Implement machine specific optimizations. We implement padding of returns
39660 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39661 static void
39662 ix86_reorg (void)
39663 {
39664 /* We are freeing block_for_insn in the toplev to keep compatibility
39665 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39666 compute_bb_for_insn ();
39667
39668 if (TARGET_SEH && current_function_has_exception_handlers ())
39669 ix86_seh_fixup_eh_fallthru ();
39670
39671 if (optimize && optimize_function_for_speed_p (cfun))
39672 {
39673 if (TARGET_PAD_SHORT_FUNCTION)
39674 ix86_pad_short_function ();
39675 else if (TARGET_PAD_RETURNS)
39676 ix86_pad_returns ();
39677 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39678 if (TARGET_FOUR_JUMP_LIMIT)
39679 ix86_avoid_jump_mispredicts ();
39680 #endif
39681 }
39682 }
39683
39684 /* Return nonzero when QImode register that must be represented via REX prefix
39685 is used. */
39686 bool
39687 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39688 {
39689 int i;
39690 extract_insn_cached (insn);
39691 for (i = 0; i < recog_data.n_operands; i++)
39692 if (GENERAL_REG_P (recog_data.operand[i])
39693 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39694 return true;
39695 return false;
39696 }
39697
39698 /* Return true when INSN mentions register that must be encoded using REX
39699 prefix. */
39700 bool
39701 x86_extended_reg_mentioned_p (rtx insn)
39702 {
39703 subrtx_iterator::array_type array;
39704 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
39705 {
39706 const_rtx x = *iter;
39707 if (REG_P (x)
39708 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
39709 return true;
39710 }
39711 return false;
39712 }
39713
39714 /* If profitable, negate (without causing overflow) integer constant
39715 of mode MODE at location LOC. Return true in this case. */
39716 bool
39717 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39718 {
39719 HOST_WIDE_INT val;
39720
39721 if (!CONST_INT_P (*loc))
39722 return false;
39723
39724 switch (mode)
39725 {
39726 case DImode:
39727 /* DImode x86_64 constants must fit in 32 bits. */
39728 gcc_assert (x86_64_immediate_operand (*loc, mode));
39729
39730 mode = SImode;
39731 break;
39732
39733 case SImode:
39734 case HImode:
39735 case QImode:
39736 break;
39737
39738 default:
39739 gcc_unreachable ();
39740 }
39741
39742 /* Avoid overflows. */
39743 if (mode_signbit_p (mode, *loc))
39744 return false;
39745
39746 val = INTVAL (*loc);
39747
39748 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39749 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39750 if ((val < 0 && val != -128)
39751 || val == 128)
39752 {
39753 *loc = GEN_INT (-val);
39754 return true;
39755 }
39756
39757 return false;
39758 }
39759
39760 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39761 optabs would emit if we didn't have TFmode patterns. */
39762
39763 void
39764 x86_emit_floatuns (rtx operands[2])
39765 {
39766 rtx_code_label *neglab, *donelab;
39767 rtx i0, i1, f0, in, out;
39768 enum machine_mode mode, inmode;
39769
39770 inmode = GET_MODE (operands[1]);
39771 gcc_assert (inmode == SImode || inmode == DImode);
39772
39773 out = operands[0];
39774 in = force_reg (inmode, operands[1]);
39775 mode = GET_MODE (out);
39776 neglab = gen_label_rtx ();
39777 donelab = gen_label_rtx ();
39778 f0 = gen_reg_rtx (mode);
39779
39780 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39781
39782 expand_float (out, in, 0);
39783
39784 emit_jump_insn (gen_jump (donelab));
39785 emit_barrier ();
39786
39787 emit_label (neglab);
39788
39789 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39790 1, OPTAB_DIRECT);
39791 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39792 1, OPTAB_DIRECT);
39793 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39794
39795 expand_float (f0, i0, 0);
39796
39797 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39798
39799 emit_label (donelab);
39800 }
39801 \f
39802 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39803 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39804 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39805 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
39806
39807 /* Get a vector mode of the same size as the original but with elements
39808 twice as wide. This is only guaranteed to apply to integral vectors. */
39809
39810 static inline enum machine_mode
39811 get_mode_wider_vector (enum machine_mode o)
39812 {
39813 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39814 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39815 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39816 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39817 return n;
39818 }
39819
39820 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39821 fill target with val via vec_duplicate. */
39822
39823 static bool
39824 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39825 {
39826 bool ok;
39827 rtx_insn *insn;
39828 rtx dup;
39829
39830 /* First attempt to recognize VAL as-is. */
39831 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39832 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39833 if (recog_memoized (insn) < 0)
39834 {
39835 rtx_insn *seq;
39836 /* If that fails, force VAL into a register. */
39837
39838 start_sequence ();
39839 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39840 seq = get_insns ();
39841 end_sequence ();
39842 if (seq)
39843 emit_insn_before (seq, insn);
39844
39845 ok = recog_memoized (insn) >= 0;
39846 gcc_assert (ok);
39847 }
39848 return true;
39849 }
39850
39851 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39852 with all elements equal to VAR. Return true if successful. */
39853
39854 static bool
39855 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39856 rtx target, rtx val)
39857 {
39858 bool ok;
39859
39860 switch (mode)
39861 {
39862 case V2SImode:
39863 case V2SFmode:
39864 if (!mmx_ok)
39865 return false;
39866 /* FALLTHRU */
39867
39868 case V4DFmode:
39869 case V4DImode:
39870 case V8SFmode:
39871 case V8SImode:
39872 case V2DFmode:
39873 case V2DImode:
39874 case V4SFmode:
39875 case V4SImode:
39876 case V16SImode:
39877 case V8DImode:
39878 case V16SFmode:
39879 case V8DFmode:
39880 return ix86_vector_duplicate_value (mode, target, val);
39881
39882 case V4HImode:
39883 if (!mmx_ok)
39884 return false;
39885 if (TARGET_SSE || TARGET_3DNOW_A)
39886 {
39887 rtx x;
39888
39889 val = gen_lowpart (SImode, val);
39890 x = gen_rtx_TRUNCATE (HImode, val);
39891 x = gen_rtx_VEC_DUPLICATE (mode, x);
39892 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39893 return true;
39894 }
39895 goto widen;
39896
39897 case V8QImode:
39898 if (!mmx_ok)
39899 return false;
39900 goto widen;
39901
39902 case V8HImode:
39903 if (TARGET_AVX2)
39904 return ix86_vector_duplicate_value (mode, target, val);
39905
39906 if (TARGET_SSE2)
39907 {
39908 struct expand_vec_perm_d dperm;
39909 rtx tmp1, tmp2;
39910
39911 permute:
39912 memset (&dperm, 0, sizeof (dperm));
39913 dperm.target = target;
39914 dperm.vmode = mode;
39915 dperm.nelt = GET_MODE_NUNITS (mode);
39916 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39917 dperm.one_operand_p = true;
39918
39919 /* Extend to SImode using a paradoxical SUBREG. */
39920 tmp1 = gen_reg_rtx (SImode);
39921 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39922
39923 /* Insert the SImode value as low element of a V4SImode vector. */
39924 tmp2 = gen_reg_rtx (V4SImode);
39925 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39926 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39927
39928 ok = (expand_vec_perm_1 (&dperm)
39929 || expand_vec_perm_broadcast_1 (&dperm));
39930 gcc_assert (ok);
39931 return ok;
39932 }
39933 goto widen;
39934
39935 case V16QImode:
39936 if (TARGET_AVX2)
39937 return ix86_vector_duplicate_value (mode, target, val);
39938
39939 if (TARGET_SSE2)
39940 goto permute;
39941 goto widen;
39942
39943 widen:
39944 /* Replicate the value once into the next wider mode and recurse. */
39945 {
39946 enum machine_mode smode, wsmode, wvmode;
39947 rtx x;
39948
39949 smode = GET_MODE_INNER (mode);
39950 wvmode = get_mode_wider_vector (mode);
39951 wsmode = GET_MODE_INNER (wvmode);
39952
39953 val = convert_modes (wsmode, smode, val, true);
39954 x = expand_simple_binop (wsmode, ASHIFT, val,
39955 GEN_INT (GET_MODE_BITSIZE (smode)),
39956 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39957 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39958
39959 x = gen_reg_rtx (wvmode);
39960 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39961 gcc_assert (ok);
39962 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39963 return ok;
39964 }
39965
39966 case V16HImode:
39967 case V32QImode:
39968 if (TARGET_AVX2)
39969 return ix86_vector_duplicate_value (mode, target, val);
39970 else
39971 {
39972 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39973 rtx x = gen_reg_rtx (hvmode);
39974
39975 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39976 gcc_assert (ok);
39977
39978 x = gen_rtx_VEC_CONCAT (mode, x, x);
39979 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39980 }
39981 return true;
39982
39983 case V64QImode:
39984 case V32HImode:
39985 if (TARGET_AVX512BW)
39986 return ix86_vector_duplicate_value (mode, target, val);
39987 else
39988 {
39989 enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
39990 rtx x = gen_reg_rtx (hvmode);
39991
39992 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39993 gcc_assert (ok);
39994
39995 x = gen_rtx_VEC_CONCAT (mode, x, x);
39996 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39997 }
39998 return true;
39999
40000 default:
40001 return false;
40002 }
40003 }
40004
40005 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40006 whose ONE_VAR element is VAR, and other elements are zero. Return true
40007 if successful. */
40008
40009 static bool
40010 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
40011 rtx target, rtx var, int one_var)
40012 {
40013 enum machine_mode vsimode;
40014 rtx new_target;
40015 rtx x, tmp;
40016 bool use_vector_set = false;
40017
40018 switch (mode)
40019 {
40020 case V2DImode:
40021 /* For SSE4.1, we normally use vector set. But if the second
40022 element is zero and inter-unit moves are OK, we use movq
40023 instead. */
40024 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
40025 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
40026 && one_var == 0));
40027 break;
40028 case V16QImode:
40029 case V4SImode:
40030 case V4SFmode:
40031 use_vector_set = TARGET_SSE4_1;
40032 break;
40033 case V8HImode:
40034 use_vector_set = TARGET_SSE2;
40035 break;
40036 case V4HImode:
40037 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
40038 break;
40039 case V32QImode:
40040 case V16HImode:
40041 case V8SImode:
40042 case V8SFmode:
40043 case V4DFmode:
40044 use_vector_set = TARGET_AVX;
40045 break;
40046 case V4DImode:
40047 /* Use ix86_expand_vector_set in 64bit mode only. */
40048 use_vector_set = TARGET_AVX && TARGET_64BIT;
40049 break;
40050 default:
40051 break;
40052 }
40053
40054 if (use_vector_set)
40055 {
40056 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
40057 var = force_reg (GET_MODE_INNER (mode), var);
40058 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40059 return true;
40060 }
40061
40062 switch (mode)
40063 {
40064 case V2SFmode:
40065 case V2SImode:
40066 if (!mmx_ok)
40067 return false;
40068 /* FALLTHRU */
40069
40070 case V2DFmode:
40071 case V2DImode:
40072 if (one_var != 0)
40073 return false;
40074 var = force_reg (GET_MODE_INNER (mode), var);
40075 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
40076 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40077 return true;
40078
40079 case V4SFmode:
40080 case V4SImode:
40081 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
40082 new_target = gen_reg_rtx (mode);
40083 else
40084 new_target = target;
40085 var = force_reg (GET_MODE_INNER (mode), var);
40086 x = gen_rtx_VEC_DUPLICATE (mode, var);
40087 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
40088 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
40089 if (one_var != 0)
40090 {
40091 /* We need to shuffle the value to the correct position, so
40092 create a new pseudo to store the intermediate result. */
40093
40094 /* With SSE2, we can use the integer shuffle insns. */
40095 if (mode != V4SFmode && TARGET_SSE2)
40096 {
40097 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
40098 const1_rtx,
40099 GEN_INT (one_var == 1 ? 0 : 1),
40100 GEN_INT (one_var == 2 ? 0 : 1),
40101 GEN_INT (one_var == 3 ? 0 : 1)));
40102 if (target != new_target)
40103 emit_move_insn (target, new_target);
40104 return true;
40105 }
40106
40107 /* Otherwise convert the intermediate result to V4SFmode and
40108 use the SSE1 shuffle instructions. */
40109 if (mode != V4SFmode)
40110 {
40111 tmp = gen_reg_rtx (V4SFmode);
40112 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
40113 }
40114 else
40115 tmp = new_target;
40116
40117 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
40118 const1_rtx,
40119 GEN_INT (one_var == 1 ? 0 : 1),
40120 GEN_INT (one_var == 2 ? 0+4 : 1+4),
40121 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
40122
40123 if (mode != V4SFmode)
40124 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
40125 else if (tmp != target)
40126 emit_move_insn (target, tmp);
40127 }
40128 else if (target != new_target)
40129 emit_move_insn (target, new_target);
40130 return true;
40131
40132 case V8HImode:
40133 case V16QImode:
40134 vsimode = V4SImode;
40135 goto widen;
40136 case V4HImode:
40137 case V8QImode:
40138 if (!mmx_ok)
40139 return false;
40140 vsimode = V2SImode;
40141 goto widen;
40142 widen:
40143 if (one_var != 0)
40144 return false;
40145
40146 /* Zero extend the variable element to SImode and recurse. */
40147 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
40148
40149 x = gen_reg_rtx (vsimode);
40150 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
40151 var, one_var))
40152 gcc_unreachable ();
40153
40154 emit_move_insn (target, gen_lowpart (mode, x));
40155 return true;
40156
40157 default:
40158 return false;
40159 }
40160 }
40161
40162 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40163 consisting of the values in VALS. It is known that all elements
40164 except ONE_VAR are constants. Return true if successful. */
40165
40166 static bool
40167 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
40168 rtx target, rtx vals, int one_var)
40169 {
40170 rtx var = XVECEXP (vals, 0, one_var);
40171 enum machine_mode wmode;
40172 rtx const_vec, x;
40173
40174 const_vec = copy_rtx (vals);
40175 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
40176 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
40177
40178 switch (mode)
40179 {
40180 case V2DFmode:
40181 case V2DImode:
40182 case V2SFmode:
40183 case V2SImode:
40184 /* For the two element vectors, it's just as easy to use
40185 the general case. */
40186 return false;
40187
40188 case V4DImode:
40189 /* Use ix86_expand_vector_set in 64bit mode only. */
40190 if (!TARGET_64BIT)
40191 return false;
40192 case V4DFmode:
40193 case V8SFmode:
40194 case V8SImode:
40195 case V16HImode:
40196 case V32QImode:
40197 case V4SFmode:
40198 case V4SImode:
40199 case V8HImode:
40200 case V4HImode:
40201 break;
40202
40203 case V16QImode:
40204 if (TARGET_SSE4_1)
40205 break;
40206 wmode = V8HImode;
40207 goto widen;
40208 case V8QImode:
40209 wmode = V4HImode;
40210 goto widen;
40211 widen:
40212 /* There's no way to set one QImode entry easily. Combine
40213 the variable value with its adjacent constant value, and
40214 promote to an HImode set. */
40215 x = XVECEXP (vals, 0, one_var ^ 1);
40216 if (one_var & 1)
40217 {
40218 var = convert_modes (HImode, QImode, var, true);
40219 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40220 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40221 x = GEN_INT (INTVAL (x) & 0xff);
40222 }
40223 else
40224 {
40225 var = convert_modes (HImode, QImode, var, true);
40226 x = gen_int_mode (INTVAL (x) << 8, HImode);
40227 }
40228 if (x != const0_rtx)
40229 var = expand_simple_binop (HImode, IOR, var, x, var,
40230 1, OPTAB_LIB_WIDEN);
40231
40232 x = gen_reg_rtx (wmode);
40233 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40234 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40235
40236 emit_move_insn (target, gen_lowpart (mode, x));
40237 return true;
40238
40239 default:
40240 return false;
40241 }
40242
40243 emit_move_insn (target, const_vec);
40244 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40245 return true;
40246 }
40247
40248 /* A subroutine of ix86_expand_vector_init_general. Use vector
40249 concatenate to handle the most general case: all values variable,
40250 and none identical. */
40251
40252 static void
40253 ix86_expand_vector_init_concat (enum machine_mode mode,
40254 rtx target, rtx *ops, int n)
40255 {
40256 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40257 rtx first[16], second[8], third[4];
40258 rtvec v;
40259 int i, j;
40260
40261 switch (n)
40262 {
40263 case 2:
40264 switch (mode)
40265 {
40266 case V16SImode:
40267 cmode = V8SImode;
40268 break;
40269 case V16SFmode:
40270 cmode = V8SFmode;
40271 break;
40272 case V8DImode:
40273 cmode = V4DImode;
40274 break;
40275 case V8DFmode:
40276 cmode = V4DFmode;
40277 break;
40278 case V8SImode:
40279 cmode = V4SImode;
40280 break;
40281 case V8SFmode:
40282 cmode = V4SFmode;
40283 break;
40284 case V4DImode:
40285 cmode = V2DImode;
40286 break;
40287 case V4DFmode:
40288 cmode = V2DFmode;
40289 break;
40290 case V4SImode:
40291 cmode = V2SImode;
40292 break;
40293 case V4SFmode:
40294 cmode = V2SFmode;
40295 break;
40296 case V2DImode:
40297 cmode = DImode;
40298 break;
40299 case V2SImode:
40300 cmode = SImode;
40301 break;
40302 case V2DFmode:
40303 cmode = DFmode;
40304 break;
40305 case V2SFmode:
40306 cmode = SFmode;
40307 break;
40308 default:
40309 gcc_unreachable ();
40310 }
40311
40312 if (!register_operand (ops[1], cmode))
40313 ops[1] = force_reg (cmode, ops[1]);
40314 if (!register_operand (ops[0], cmode))
40315 ops[0] = force_reg (cmode, ops[0]);
40316 emit_insn (gen_rtx_SET (VOIDmode, target,
40317 gen_rtx_VEC_CONCAT (mode, ops[0],
40318 ops[1])));
40319 break;
40320
40321 case 4:
40322 switch (mode)
40323 {
40324 case V4DImode:
40325 cmode = V2DImode;
40326 break;
40327 case V4DFmode:
40328 cmode = V2DFmode;
40329 break;
40330 case V4SImode:
40331 cmode = V2SImode;
40332 break;
40333 case V4SFmode:
40334 cmode = V2SFmode;
40335 break;
40336 default:
40337 gcc_unreachable ();
40338 }
40339 goto half;
40340
40341 case 8:
40342 switch (mode)
40343 {
40344 case V8DImode:
40345 cmode = V2DImode;
40346 hmode = V4DImode;
40347 break;
40348 case V8DFmode:
40349 cmode = V2DFmode;
40350 hmode = V4DFmode;
40351 break;
40352 case V8SImode:
40353 cmode = V2SImode;
40354 hmode = V4SImode;
40355 break;
40356 case V8SFmode:
40357 cmode = V2SFmode;
40358 hmode = V4SFmode;
40359 break;
40360 default:
40361 gcc_unreachable ();
40362 }
40363 goto half;
40364
40365 case 16:
40366 switch (mode)
40367 {
40368 case V16SImode:
40369 cmode = V2SImode;
40370 hmode = V4SImode;
40371 gmode = V8SImode;
40372 break;
40373 case V16SFmode:
40374 cmode = V2SFmode;
40375 hmode = V4SFmode;
40376 gmode = V8SFmode;
40377 break;
40378 default:
40379 gcc_unreachable ();
40380 }
40381 goto half;
40382
40383 half:
40384 /* FIXME: We process inputs backward to help RA. PR 36222. */
40385 i = n - 1;
40386 j = (n >> 1) - 1;
40387 for (; i > 0; i -= 2, j--)
40388 {
40389 first[j] = gen_reg_rtx (cmode);
40390 v = gen_rtvec (2, ops[i - 1], ops[i]);
40391 ix86_expand_vector_init (false, first[j],
40392 gen_rtx_PARALLEL (cmode, v));
40393 }
40394
40395 n >>= 1;
40396 if (n > 4)
40397 {
40398 gcc_assert (hmode != VOIDmode);
40399 gcc_assert (gmode != VOIDmode);
40400 for (i = j = 0; i < n; i += 2, j++)
40401 {
40402 second[j] = gen_reg_rtx (hmode);
40403 ix86_expand_vector_init_concat (hmode, second [j],
40404 &first [i], 2);
40405 }
40406 n >>= 1;
40407 for (i = j = 0; i < n; i += 2, j++)
40408 {
40409 third[j] = gen_reg_rtx (gmode);
40410 ix86_expand_vector_init_concat (gmode, third[j],
40411 &second[i], 2);
40412 }
40413 n >>= 1;
40414 ix86_expand_vector_init_concat (mode, target, third, n);
40415 }
40416 else if (n > 2)
40417 {
40418 gcc_assert (hmode != VOIDmode);
40419 for (i = j = 0; i < n; i += 2, j++)
40420 {
40421 second[j] = gen_reg_rtx (hmode);
40422 ix86_expand_vector_init_concat (hmode, second [j],
40423 &first [i], 2);
40424 }
40425 n >>= 1;
40426 ix86_expand_vector_init_concat (mode, target, second, n);
40427 }
40428 else
40429 ix86_expand_vector_init_concat (mode, target, first, n);
40430 break;
40431
40432 default:
40433 gcc_unreachable ();
40434 }
40435 }
40436
40437 /* A subroutine of ix86_expand_vector_init_general. Use vector
40438 interleave to handle the most general case: all values variable,
40439 and none identical. */
40440
40441 static void
40442 ix86_expand_vector_init_interleave (enum machine_mode mode,
40443 rtx target, rtx *ops, int n)
40444 {
40445 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40446 int i, j;
40447 rtx op0, op1;
40448 rtx (*gen_load_even) (rtx, rtx, rtx);
40449 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40450 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40451
40452 switch (mode)
40453 {
40454 case V8HImode:
40455 gen_load_even = gen_vec_setv8hi;
40456 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40457 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40458 inner_mode = HImode;
40459 first_imode = V4SImode;
40460 second_imode = V2DImode;
40461 third_imode = VOIDmode;
40462 break;
40463 case V16QImode:
40464 gen_load_even = gen_vec_setv16qi;
40465 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40466 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40467 inner_mode = QImode;
40468 first_imode = V8HImode;
40469 second_imode = V4SImode;
40470 third_imode = V2DImode;
40471 break;
40472 default:
40473 gcc_unreachable ();
40474 }
40475
40476 for (i = 0; i < n; i++)
40477 {
40478 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40479 op0 = gen_reg_rtx (SImode);
40480 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40481
40482 /* Insert the SImode value as low element of V4SImode vector. */
40483 op1 = gen_reg_rtx (V4SImode);
40484 op0 = gen_rtx_VEC_MERGE (V4SImode,
40485 gen_rtx_VEC_DUPLICATE (V4SImode,
40486 op0),
40487 CONST0_RTX (V4SImode),
40488 const1_rtx);
40489 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40490
40491 /* Cast the V4SImode vector back to a vector in orignal mode. */
40492 op0 = gen_reg_rtx (mode);
40493 emit_move_insn (op0, gen_lowpart (mode, op1));
40494
40495 /* Load even elements into the second position. */
40496 emit_insn (gen_load_even (op0,
40497 force_reg (inner_mode,
40498 ops [i + i + 1]),
40499 const1_rtx));
40500
40501 /* Cast vector to FIRST_IMODE vector. */
40502 ops[i] = gen_reg_rtx (first_imode);
40503 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40504 }
40505
40506 /* Interleave low FIRST_IMODE vectors. */
40507 for (i = j = 0; i < n; i += 2, j++)
40508 {
40509 op0 = gen_reg_rtx (first_imode);
40510 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40511
40512 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40513 ops[j] = gen_reg_rtx (second_imode);
40514 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40515 }
40516
40517 /* Interleave low SECOND_IMODE vectors. */
40518 switch (second_imode)
40519 {
40520 case V4SImode:
40521 for (i = j = 0; i < n / 2; i += 2, j++)
40522 {
40523 op0 = gen_reg_rtx (second_imode);
40524 emit_insn (gen_interleave_second_low (op0, ops[i],
40525 ops[i + 1]));
40526
40527 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40528 vector. */
40529 ops[j] = gen_reg_rtx (third_imode);
40530 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40531 }
40532 second_imode = V2DImode;
40533 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40534 /* FALLTHRU */
40535
40536 case V2DImode:
40537 op0 = gen_reg_rtx (second_imode);
40538 emit_insn (gen_interleave_second_low (op0, ops[0],
40539 ops[1]));
40540
40541 /* Cast the SECOND_IMODE vector back to a vector on original
40542 mode. */
40543 emit_insn (gen_rtx_SET (VOIDmode, target,
40544 gen_lowpart (mode, op0)));
40545 break;
40546
40547 default:
40548 gcc_unreachable ();
40549 }
40550 }
40551
40552 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40553 all values variable, and none identical. */
40554
40555 static void
40556 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40557 rtx target, rtx vals)
40558 {
40559 rtx ops[64], op0, op1, op2, op3, op4, op5;
40560 enum machine_mode half_mode = VOIDmode;
40561 enum machine_mode quarter_mode = VOIDmode;
40562 int n, i;
40563
40564 switch (mode)
40565 {
40566 case V2SFmode:
40567 case V2SImode:
40568 if (!mmx_ok && !TARGET_SSE)
40569 break;
40570 /* FALLTHRU */
40571
40572 case V16SImode:
40573 case V16SFmode:
40574 case V8DFmode:
40575 case V8DImode:
40576 case V8SFmode:
40577 case V8SImode:
40578 case V4DFmode:
40579 case V4DImode:
40580 case V4SFmode:
40581 case V4SImode:
40582 case V2DFmode:
40583 case V2DImode:
40584 n = GET_MODE_NUNITS (mode);
40585 for (i = 0; i < n; i++)
40586 ops[i] = XVECEXP (vals, 0, i);
40587 ix86_expand_vector_init_concat (mode, target, ops, n);
40588 return;
40589
40590 case V32QImode:
40591 half_mode = V16QImode;
40592 goto half;
40593
40594 case V16HImode:
40595 half_mode = V8HImode;
40596 goto half;
40597
40598 half:
40599 n = GET_MODE_NUNITS (mode);
40600 for (i = 0; i < n; i++)
40601 ops[i] = XVECEXP (vals, 0, i);
40602 op0 = gen_reg_rtx (half_mode);
40603 op1 = gen_reg_rtx (half_mode);
40604 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40605 n >> 2);
40606 ix86_expand_vector_init_interleave (half_mode, op1,
40607 &ops [n >> 1], n >> 2);
40608 emit_insn (gen_rtx_SET (VOIDmode, target,
40609 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40610 return;
40611
40612 case V64QImode:
40613 quarter_mode = V16QImode;
40614 half_mode = V32QImode;
40615 goto quarter;
40616
40617 case V32HImode:
40618 quarter_mode = V8HImode;
40619 half_mode = V16HImode;
40620 goto quarter;
40621
40622 quarter:
40623 n = GET_MODE_NUNITS (mode);
40624 for (i = 0; i < n; i++)
40625 ops[i] = XVECEXP (vals, 0, i);
40626 op0 = gen_reg_rtx (quarter_mode);
40627 op1 = gen_reg_rtx (quarter_mode);
40628 op2 = gen_reg_rtx (quarter_mode);
40629 op3 = gen_reg_rtx (quarter_mode);
40630 op4 = gen_reg_rtx (half_mode);
40631 op5 = gen_reg_rtx (half_mode);
40632 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
40633 n >> 3);
40634 ix86_expand_vector_init_interleave (quarter_mode, op1,
40635 &ops [n >> 2], n >> 3);
40636 ix86_expand_vector_init_interleave (quarter_mode, op2,
40637 &ops [n >> 1], n >> 3);
40638 ix86_expand_vector_init_interleave (quarter_mode, op3,
40639 &ops [(n >> 1) | (n >> 2)], n >> 3);
40640 emit_insn (gen_rtx_SET (VOIDmode, op4,
40641 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
40642 emit_insn (gen_rtx_SET (VOIDmode, op5,
40643 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
40644 emit_insn (gen_rtx_SET (VOIDmode, target,
40645 gen_rtx_VEC_CONCAT (mode, op4, op5)));
40646 return;
40647
40648 case V16QImode:
40649 if (!TARGET_SSE4_1)
40650 break;
40651 /* FALLTHRU */
40652
40653 case V8HImode:
40654 if (!TARGET_SSE2)
40655 break;
40656
40657 /* Don't use ix86_expand_vector_init_interleave if we can't
40658 move from GPR to SSE register directly. */
40659 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40660 break;
40661
40662 n = GET_MODE_NUNITS (mode);
40663 for (i = 0; i < n; i++)
40664 ops[i] = XVECEXP (vals, 0, i);
40665 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40666 return;
40667
40668 case V4HImode:
40669 case V8QImode:
40670 break;
40671
40672 default:
40673 gcc_unreachable ();
40674 }
40675
40676 {
40677 int i, j, n_elts, n_words, n_elt_per_word;
40678 enum machine_mode inner_mode;
40679 rtx words[4], shift;
40680
40681 inner_mode = GET_MODE_INNER (mode);
40682 n_elts = GET_MODE_NUNITS (mode);
40683 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40684 n_elt_per_word = n_elts / n_words;
40685 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40686
40687 for (i = 0; i < n_words; ++i)
40688 {
40689 rtx word = NULL_RTX;
40690
40691 for (j = 0; j < n_elt_per_word; ++j)
40692 {
40693 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40694 elt = convert_modes (word_mode, inner_mode, elt, true);
40695
40696 if (j == 0)
40697 word = elt;
40698 else
40699 {
40700 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40701 word, 1, OPTAB_LIB_WIDEN);
40702 word = expand_simple_binop (word_mode, IOR, word, elt,
40703 word, 1, OPTAB_LIB_WIDEN);
40704 }
40705 }
40706
40707 words[i] = word;
40708 }
40709
40710 if (n_words == 1)
40711 emit_move_insn (target, gen_lowpart (mode, words[0]));
40712 else if (n_words == 2)
40713 {
40714 rtx tmp = gen_reg_rtx (mode);
40715 emit_clobber (tmp);
40716 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40717 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40718 emit_move_insn (target, tmp);
40719 }
40720 else if (n_words == 4)
40721 {
40722 rtx tmp = gen_reg_rtx (V4SImode);
40723 gcc_assert (word_mode == SImode);
40724 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40725 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40726 emit_move_insn (target, gen_lowpart (mode, tmp));
40727 }
40728 else
40729 gcc_unreachable ();
40730 }
40731 }
40732
40733 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40734 instructions unless MMX_OK is true. */
40735
40736 void
40737 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40738 {
40739 enum machine_mode mode = GET_MODE (target);
40740 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40741 int n_elts = GET_MODE_NUNITS (mode);
40742 int n_var = 0, one_var = -1;
40743 bool all_same = true, all_const_zero = true;
40744 int i;
40745 rtx x;
40746
40747 for (i = 0; i < n_elts; ++i)
40748 {
40749 x = XVECEXP (vals, 0, i);
40750 if (!(CONST_INT_P (x)
40751 || GET_CODE (x) == CONST_DOUBLE
40752 || GET_CODE (x) == CONST_FIXED))
40753 n_var++, one_var = i;
40754 else if (x != CONST0_RTX (inner_mode))
40755 all_const_zero = false;
40756 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40757 all_same = false;
40758 }
40759
40760 /* Constants are best loaded from the constant pool. */
40761 if (n_var == 0)
40762 {
40763 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40764 return;
40765 }
40766
40767 /* If all values are identical, broadcast the value. */
40768 if (all_same
40769 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40770 XVECEXP (vals, 0, 0)))
40771 return;
40772
40773 /* Values where only one field is non-constant are best loaded from
40774 the pool and overwritten via move later. */
40775 if (n_var == 1)
40776 {
40777 if (all_const_zero
40778 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40779 XVECEXP (vals, 0, one_var),
40780 one_var))
40781 return;
40782
40783 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40784 return;
40785 }
40786
40787 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40788 }
40789
40790 void
40791 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40792 {
40793 enum machine_mode mode = GET_MODE (target);
40794 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40795 enum machine_mode half_mode;
40796 bool use_vec_merge = false;
40797 rtx tmp;
40798 static rtx (*gen_extract[6][2]) (rtx, rtx)
40799 = {
40800 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40801 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40802 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40803 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40804 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40805 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40806 };
40807 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40808 = {
40809 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40810 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40811 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40812 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40813 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40814 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40815 };
40816 int i, j, n;
40817
40818 switch (mode)
40819 {
40820 case V2SFmode:
40821 case V2SImode:
40822 if (mmx_ok)
40823 {
40824 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40825 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40826 if (elt == 0)
40827 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40828 else
40829 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40830 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40831 return;
40832 }
40833 break;
40834
40835 case V2DImode:
40836 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40837 if (use_vec_merge)
40838 break;
40839
40840 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40841 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40842 if (elt == 0)
40843 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40844 else
40845 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40846 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40847 return;
40848
40849 case V2DFmode:
40850 {
40851 rtx op0, op1;
40852
40853 /* For the two element vectors, we implement a VEC_CONCAT with
40854 the extraction of the other element. */
40855
40856 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40857 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40858
40859 if (elt == 0)
40860 op0 = val, op1 = tmp;
40861 else
40862 op0 = tmp, op1 = val;
40863
40864 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40865 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40866 }
40867 return;
40868
40869 case V4SFmode:
40870 use_vec_merge = TARGET_SSE4_1;
40871 if (use_vec_merge)
40872 break;
40873
40874 switch (elt)
40875 {
40876 case 0:
40877 use_vec_merge = true;
40878 break;
40879
40880 case 1:
40881 /* tmp = target = A B C D */
40882 tmp = copy_to_reg (target);
40883 /* target = A A B B */
40884 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40885 /* target = X A B B */
40886 ix86_expand_vector_set (false, target, val, 0);
40887 /* target = A X C D */
40888 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40889 const1_rtx, const0_rtx,
40890 GEN_INT (2+4), GEN_INT (3+4)));
40891 return;
40892
40893 case 2:
40894 /* tmp = target = A B C D */
40895 tmp = copy_to_reg (target);
40896 /* tmp = X B C D */
40897 ix86_expand_vector_set (false, tmp, val, 0);
40898 /* target = A B X D */
40899 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40900 const0_rtx, const1_rtx,
40901 GEN_INT (0+4), GEN_INT (3+4)));
40902 return;
40903
40904 case 3:
40905 /* tmp = target = A B C D */
40906 tmp = copy_to_reg (target);
40907 /* tmp = X B C D */
40908 ix86_expand_vector_set (false, tmp, val, 0);
40909 /* target = A B X D */
40910 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40911 const0_rtx, const1_rtx,
40912 GEN_INT (2+4), GEN_INT (0+4)));
40913 return;
40914
40915 default:
40916 gcc_unreachable ();
40917 }
40918 break;
40919
40920 case V4SImode:
40921 use_vec_merge = TARGET_SSE4_1;
40922 if (use_vec_merge)
40923 break;
40924
40925 /* Element 0 handled by vec_merge below. */
40926 if (elt == 0)
40927 {
40928 use_vec_merge = true;
40929 break;
40930 }
40931
40932 if (TARGET_SSE2)
40933 {
40934 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40935 store into element 0, then shuffle them back. */
40936
40937 rtx order[4];
40938
40939 order[0] = GEN_INT (elt);
40940 order[1] = const1_rtx;
40941 order[2] = const2_rtx;
40942 order[3] = GEN_INT (3);
40943 order[elt] = const0_rtx;
40944
40945 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40946 order[1], order[2], order[3]));
40947
40948 ix86_expand_vector_set (false, target, val, 0);
40949
40950 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40951 order[1], order[2], order[3]));
40952 }
40953 else
40954 {
40955 /* For SSE1, we have to reuse the V4SF code. */
40956 rtx t = gen_reg_rtx (V4SFmode);
40957 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40958 emit_move_insn (target, gen_lowpart (mode, t));
40959 }
40960 return;
40961
40962 case V8HImode:
40963 use_vec_merge = TARGET_SSE2;
40964 break;
40965 case V4HImode:
40966 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40967 break;
40968
40969 case V16QImode:
40970 use_vec_merge = TARGET_SSE4_1;
40971 break;
40972
40973 case V8QImode:
40974 break;
40975
40976 case V32QImode:
40977 half_mode = V16QImode;
40978 j = 0;
40979 n = 16;
40980 goto half;
40981
40982 case V16HImode:
40983 half_mode = V8HImode;
40984 j = 1;
40985 n = 8;
40986 goto half;
40987
40988 case V8SImode:
40989 half_mode = V4SImode;
40990 j = 2;
40991 n = 4;
40992 goto half;
40993
40994 case V4DImode:
40995 half_mode = V2DImode;
40996 j = 3;
40997 n = 2;
40998 goto half;
40999
41000 case V8SFmode:
41001 half_mode = V4SFmode;
41002 j = 4;
41003 n = 4;
41004 goto half;
41005
41006 case V4DFmode:
41007 half_mode = V2DFmode;
41008 j = 5;
41009 n = 2;
41010 goto half;
41011
41012 half:
41013 /* Compute offset. */
41014 i = elt / n;
41015 elt %= n;
41016
41017 gcc_assert (i <= 1);
41018
41019 /* Extract the half. */
41020 tmp = gen_reg_rtx (half_mode);
41021 emit_insn (gen_extract[j][i] (tmp, target));
41022
41023 /* Put val in tmp at elt. */
41024 ix86_expand_vector_set (false, tmp, val, elt);
41025
41026 /* Put it back. */
41027 emit_insn (gen_insert[j][i] (target, target, tmp));
41028 return;
41029
41030 case V8DFmode:
41031 if (TARGET_AVX512F)
41032 {
41033 tmp = gen_reg_rtx (mode);
41034 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41035 gen_rtx_VEC_DUPLICATE (mode, val)));
41036 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
41037 force_reg (QImode, GEN_INT (1 << elt))));
41038 return;
41039 }
41040 else
41041 break;
41042 case V8DImode:
41043 if (TARGET_AVX512F)
41044 {
41045 tmp = gen_reg_rtx (mode);
41046 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41047 gen_rtx_VEC_DUPLICATE (mode, val)));
41048 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
41049 force_reg (QImode, GEN_INT (1 << elt))));
41050 return;
41051 }
41052 else
41053 break;
41054 case V16SFmode:
41055 if (TARGET_AVX512F)
41056 {
41057 tmp = gen_reg_rtx (mode);
41058 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41059 gen_rtx_VEC_DUPLICATE (mode, val)));
41060 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
41061 force_reg (HImode, GEN_INT (1 << elt))));
41062 return;
41063 }
41064 else
41065 break;
41066 case V16SImode:
41067 if (TARGET_AVX512F)
41068 {
41069 tmp = gen_reg_rtx (mode);
41070 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41071 gen_rtx_VEC_DUPLICATE (mode, val)));
41072 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
41073 force_reg (HImode, GEN_INT (1 << elt))));
41074 return;
41075 }
41076 else
41077 break;
41078 case V32HImode:
41079 if (TARGET_AVX512F && TARGET_AVX512BW)
41080 {
41081 tmp = gen_reg_rtx (mode);
41082 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41083 gen_rtx_VEC_DUPLICATE (mode, val)));
41084 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
41085 force_reg (SImode, GEN_INT (1 << elt))));
41086 return;
41087 }
41088 else
41089 break;
41090 case V64QImode:
41091 if (TARGET_AVX512F && TARGET_AVX512BW)
41092 {
41093 tmp = gen_reg_rtx (mode);
41094 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41095 gen_rtx_VEC_DUPLICATE (mode, val)));
41096 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
41097 force_reg (DImode, GEN_INT (1 << elt))));
41098 return;
41099 }
41100 else
41101 break;
41102
41103 default:
41104 break;
41105 }
41106
41107 if (use_vec_merge)
41108 {
41109 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
41110 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
41111 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41112 }
41113 else
41114 {
41115 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41116
41117 emit_move_insn (mem, target);
41118
41119 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41120 emit_move_insn (tmp, val);
41121
41122 emit_move_insn (target, mem);
41123 }
41124 }
41125
41126 void
41127 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
41128 {
41129 enum machine_mode mode = GET_MODE (vec);
41130 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41131 bool use_vec_extr = false;
41132 rtx tmp;
41133
41134 switch (mode)
41135 {
41136 case V2SImode:
41137 case V2SFmode:
41138 if (!mmx_ok)
41139 break;
41140 /* FALLTHRU */
41141
41142 case V2DFmode:
41143 case V2DImode:
41144 use_vec_extr = true;
41145 break;
41146
41147 case V4SFmode:
41148 use_vec_extr = TARGET_SSE4_1;
41149 if (use_vec_extr)
41150 break;
41151
41152 switch (elt)
41153 {
41154 case 0:
41155 tmp = vec;
41156 break;
41157
41158 case 1:
41159 case 3:
41160 tmp = gen_reg_rtx (mode);
41161 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
41162 GEN_INT (elt), GEN_INT (elt),
41163 GEN_INT (elt+4), GEN_INT (elt+4)));
41164 break;
41165
41166 case 2:
41167 tmp = gen_reg_rtx (mode);
41168 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
41169 break;
41170
41171 default:
41172 gcc_unreachable ();
41173 }
41174 vec = tmp;
41175 use_vec_extr = true;
41176 elt = 0;
41177 break;
41178
41179 case V4SImode:
41180 use_vec_extr = TARGET_SSE4_1;
41181 if (use_vec_extr)
41182 break;
41183
41184 if (TARGET_SSE2)
41185 {
41186 switch (elt)
41187 {
41188 case 0:
41189 tmp = vec;
41190 break;
41191
41192 case 1:
41193 case 3:
41194 tmp = gen_reg_rtx (mode);
41195 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
41196 GEN_INT (elt), GEN_INT (elt),
41197 GEN_INT (elt), GEN_INT (elt)));
41198 break;
41199
41200 case 2:
41201 tmp = gen_reg_rtx (mode);
41202 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
41203 break;
41204
41205 default:
41206 gcc_unreachable ();
41207 }
41208 vec = tmp;
41209 use_vec_extr = true;
41210 elt = 0;
41211 }
41212 else
41213 {
41214 /* For SSE1, we have to reuse the V4SF code. */
41215 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
41216 gen_lowpart (V4SFmode, vec), elt);
41217 return;
41218 }
41219 break;
41220
41221 case V8HImode:
41222 use_vec_extr = TARGET_SSE2;
41223 break;
41224 case V4HImode:
41225 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
41226 break;
41227
41228 case V16QImode:
41229 use_vec_extr = TARGET_SSE4_1;
41230 break;
41231
41232 case V8SFmode:
41233 if (TARGET_AVX)
41234 {
41235 tmp = gen_reg_rtx (V4SFmode);
41236 if (elt < 4)
41237 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
41238 else
41239 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
41240 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41241 return;
41242 }
41243 break;
41244
41245 case V4DFmode:
41246 if (TARGET_AVX)
41247 {
41248 tmp = gen_reg_rtx (V2DFmode);
41249 if (elt < 2)
41250 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
41251 else
41252 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
41253 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41254 return;
41255 }
41256 break;
41257
41258 case V32QImode:
41259 if (TARGET_AVX)
41260 {
41261 tmp = gen_reg_rtx (V16QImode);
41262 if (elt < 16)
41263 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
41264 else
41265 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
41266 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41267 return;
41268 }
41269 break;
41270
41271 case V16HImode:
41272 if (TARGET_AVX)
41273 {
41274 tmp = gen_reg_rtx (V8HImode);
41275 if (elt < 8)
41276 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
41277 else
41278 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
41279 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41280 return;
41281 }
41282 break;
41283
41284 case V8SImode:
41285 if (TARGET_AVX)
41286 {
41287 tmp = gen_reg_rtx (V4SImode);
41288 if (elt < 4)
41289 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
41290 else
41291 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
41292 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41293 return;
41294 }
41295 break;
41296
41297 case V4DImode:
41298 if (TARGET_AVX)
41299 {
41300 tmp = gen_reg_rtx (V2DImode);
41301 if (elt < 2)
41302 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41303 else
41304 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41305 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41306 return;
41307 }
41308 break;
41309
41310 case V32HImode:
41311 if (TARGET_AVX512BW)
41312 {
41313 tmp = gen_reg_rtx (V16HImode);
41314 if (elt < 16)
41315 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
41316 else
41317 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
41318 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41319 return;
41320 }
41321 break;
41322
41323 case V64QImode:
41324 if (TARGET_AVX512BW)
41325 {
41326 tmp = gen_reg_rtx (V32QImode);
41327 if (elt < 32)
41328 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
41329 else
41330 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
41331 ix86_expand_vector_extract (false, target, tmp, elt & 31);
41332 return;
41333 }
41334 break;
41335
41336 case V16SFmode:
41337 tmp = gen_reg_rtx (V8SFmode);
41338 if (elt < 8)
41339 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41340 else
41341 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41342 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41343 return;
41344
41345 case V8DFmode:
41346 tmp = gen_reg_rtx (V4DFmode);
41347 if (elt < 4)
41348 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41349 else
41350 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41351 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41352 return;
41353
41354 case V16SImode:
41355 tmp = gen_reg_rtx (V8SImode);
41356 if (elt < 8)
41357 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41358 else
41359 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41360 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41361 return;
41362
41363 case V8DImode:
41364 tmp = gen_reg_rtx (V4DImode);
41365 if (elt < 4)
41366 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41367 else
41368 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41369 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41370 return;
41371
41372 case V8QImode:
41373 /* ??? Could extract the appropriate HImode element and shift. */
41374 default:
41375 break;
41376 }
41377
41378 if (use_vec_extr)
41379 {
41380 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41381 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41382
41383 /* Let the rtl optimizers know about the zero extension performed. */
41384 if (inner_mode == QImode || inner_mode == HImode)
41385 {
41386 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41387 target = gen_lowpart (SImode, target);
41388 }
41389
41390 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41391 }
41392 else
41393 {
41394 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41395
41396 emit_move_insn (mem, vec);
41397
41398 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41399 emit_move_insn (target, tmp);
41400 }
41401 }
41402
41403 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41404 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41405 The upper bits of DEST are undefined, though they shouldn't cause
41406 exceptions (some bits from src or all zeros are ok). */
41407
41408 static void
41409 emit_reduc_half (rtx dest, rtx src, int i)
41410 {
41411 rtx tem, d = dest;
41412 switch (GET_MODE (src))
41413 {
41414 case V4SFmode:
41415 if (i == 128)
41416 tem = gen_sse_movhlps (dest, src, src);
41417 else
41418 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41419 GEN_INT (1 + 4), GEN_INT (1 + 4));
41420 break;
41421 case V2DFmode:
41422 tem = gen_vec_interleave_highv2df (dest, src, src);
41423 break;
41424 case V16QImode:
41425 case V8HImode:
41426 case V4SImode:
41427 case V2DImode:
41428 d = gen_reg_rtx (V1TImode);
41429 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41430 GEN_INT (i / 2));
41431 break;
41432 case V8SFmode:
41433 if (i == 256)
41434 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41435 else
41436 tem = gen_avx_shufps256 (dest, src, src,
41437 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41438 break;
41439 case V4DFmode:
41440 if (i == 256)
41441 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41442 else
41443 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41444 break;
41445 case V32QImode:
41446 case V16HImode:
41447 case V8SImode:
41448 case V4DImode:
41449 if (i == 256)
41450 {
41451 if (GET_MODE (dest) != V4DImode)
41452 d = gen_reg_rtx (V4DImode);
41453 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41454 gen_lowpart (V4DImode, src),
41455 const1_rtx);
41456 }
41457 else
41458 {
41459 d = gen_reg_rtx (V2TImode);
41460 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41461 GEN_INT (i / 2));
41462 }
41463 break;
41464 case V64QImode:
41465 case V32HImode:
41466 case V16SImode:
41467 case V16SFmode:
41468 case V8DImode:
41469 case V8DFmode:
41470 if (i > 128)
41471 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41472 gen_lowpart (V16SImode, src),
41473 gen_lowpart (V16SImode, src),
41474 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41475 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41476 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41477 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41478 GEN_INT (0xC), GEN_INT (0xD),
41479 GEN_INT (0xE), GEN_INT (0xF),
41480 GEN_INT (0x10), GEN_INT (0x11),
41481 GEN_INT (0x12), GEN_INT (0x13),
41482 GEN_INT (0x14), GEN_INT (0x15),
41483 GEN_INT (0x16), GEN_INT (0x17));
41484 else
41485 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41486 gen_lowpart (V16SImode, src),
41487 GEN_INT (i == 128 ? 0x2 : 0x1),
41488 GEN_INT (0x3),
41489 GEN_INT (0x3),
41490 GEN_INT (0x3),
41491 GEN_INT (i == 128 ? 0x6 : 0x5),
41492 GEN_INT (0x7),
41493 GEN_INT (0x7),
41494 GEN_INT (0x7),
41495 GEN_INT (i == 128 ? 0xA : 0x9),
41496 GEN_INT (0xB),
41497 GEN_INT (0xB),
41498 GEN_INT (0xB),
41499 GEN_INT (i == 128 ? 0xE : 0xD),
41500 GEN_INT (0xF),
41501 GEN_INT (0xF),
41502 GEN_INT (0xF));
41503 break;
41504 default:
41505 gcc_unreachable ();
41506 }
41507 emit_insn (tem);
41508 if (d != dest)
41509 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41510 }
41511
41512 /* Expand a vector reduction. FN is the binary pattern to reduce;
41513 DEST is the destination; IN is the input vector. */
41514
41515 void
41516 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41517 {
41518 rtx half, dst, vec = in;
41519 enum machine_mode mode = GET_MODE (in);
41520 int i;
41521
41522 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41523 if (TARGET_SSE4_1
41524 && mode == V8HImode
41525 && fn == gen_uminv8hi3)
41526 {
41527 emit_insn (gen_sse4_1_phminposuw (dest, in));
41528 return;
41529 }
41530
41531 for (i = GET_MODE_BITSIZE (mode);
41532 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41533 i >>= 1)
41534 {
41535 half = gen_reg_rtx (mode);
41536 emit_reduc_half (half, vec, i);
41537 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41538 dst = dest;
41539 else
41540 dst = gen_reg_rtx (mode);
41541 emit_insn (fn (dst, half, vec));
41542 vec = dst;
41543 }
41544 }
41545 \f
41546 /* Target hook for scalar_mode_supported_p. */
41547 static bool
41548 ix86_scalar_mode_supported_p (enum machine_mode mode)
41549 {
41550 if (DECIMAL_FLOAT_MODE_P (mode))
41551 return default_decimal_float_supported_p ();
41552 else if (mode == TFmode)
41553 return true;
41554 else
41555 return default_scalar_mode_supported_p (mode);
41556 }
41557
41558 /* Implements target hook vector_mode_supported_p. */
41559 static bool
41560 ix86_vector_mode_supported_p (enum machine_mode mode)
41561 {
41562 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41563 return true;
41564 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41565 return true;
41566 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41567 return true;
41568 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41569 return true;
41570 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41571 return true;
41572 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41573 return true;
41574 return false;
41575 }
41576
41577 /* Implement target hook libgcc_floating_mode_supported_p. */
41578 static bool
41579 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
41580 {
41581 switch (mode)
41582 {
41583 case SFmode:
41584 case DFmode:
41585 case XFmode:
41586 return true;
41587
41588 case TFmode:
41589 #ifdef IX86_NO_LIBGCC_TFMODE
41590 return false;
41591 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41592 return TARGET_LONG_DOUBLE_128;
41593 #else
41594 return true;
41595 #endif
41596
41597 default:
41598 return false;
41599 }
41600 }
41601
41602 /* Target hook for c_mode_for_suffix. */
41603 static enum machine_mode
41604 ix86_c_mode_for_suffix (char suffix)
41605 {
41606 if (suffix == 'q')
41607 return TFmode;
41608 if (suffix == 'w')
41609 return XFmode;
41610
41611 return VOIDmode;
41612 }
41613
41614 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41615
41616 We do this in the new i386 backend to maintain source compatibility
41617 with the old cc0-based compiler. */
41618
41619 static tree
41620 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41621 {
41622 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41623 clobbers);
41624 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41625 clobbers);
41626 return clobbers;
41627 }
41628
41629 /* Implements target vector targetm.asm.encode_section_info. */
41630
41631 static void ATTRIBUTE_UNUSED
41632 ix86_encode_section_info (tree decl, rtx rtl, int first)
41633 {
41634 default_encode_section_info (decl, rtl, first);
41635
41636 if (TREE_CODE (decl) == VAR_DECL
41637 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41638 && ix86_in_large_data_p (decl))
41639 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41640 }
41641
41642 /* Worker function for REVERSE_CONDITION. */
41643
41644 enum rtx_code
41645 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41646 {
41647 return (mode != CCFPmode && mode != CCFPUmode
41648 ? reverse_condition (code)
41649 : reverse_condition_maybe_unordered (code));
41650 }
41651
41652 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41653 to OPERANDS[0]. */
41654
41655 const char *
41656 output_387_reg_move (rtx insn, rtx *operands)
41657 {
41658 if (REG_P (operands[0]))
41659 {
41660 if (REG_P (operands[1])
41661 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41662 {
41663 if (REGNO (operands[0]) == FIRST_STACK_REG)
41664 return output_387_ffreep (operands, 0);
41665 return "fstp\t%y0";
41666 }
41667 if (STACK_TOP_P (operands[0]))
41668 return "fld%Z1\t%y1";
41669 return "fst\t%y0";
41670 }
41671 else if (MEM_P (operands[0]))
41672 {
41673 gcc_assert (REG_P (operands[1]));
41674 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41675 return "fstp%Z0\t%y0";
41676 else
41677 {
41678 /* There is no non-popping store to memory for XFmode.
41679 So if we need one, follow the store with a load. */
41680 if (GET_MODE (operands[0]) == XFmode)
41681 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41682 else
41683 return "fst%Z0\t%y0";
41684 }
41685 }
41686 else
41687 gcc_unreachable();
41688 }
41689
41690 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41691 FP status register is set. */
41692
41693 void
41694 ix86_emit_fp_unordered_jump (rtx label)
41695 {
41696 rtx reg = gen_reg_rtx (HImode);
41697 rtx temp;
41698
41699 emit_insn (gen_x86_fnstsw_1 (reg));
41700
41701 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41702 {
41703 emit_insn (gen_x86_sahf_1 (reg));
41704
41705 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41706 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41707 }
41708 else
41709 {
41710 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41711
41712 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41713 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41714 }
41715
41716 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41717 gen_rtx_LABEL_REF (VOIDmode, label),
41718 pc_rtx);
41719 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41720
41721 emit_jump_insn (temp);
41722 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41723 }
41724
41725 /* Output code to perform a log1p XFmode calculation. */
41726
41727 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41728 {
41729 rtx_code_label *label1 = gen_label_rtx ();
41730 rtx_code_label *label2 = gen_label_rtx ();
41731
41732 rtx tmp = gen_reg_rtx (XFmode);
41733 rtx tmp2 = gen_reg_rtx (XFmode);
41734 rtx test;
41735
41736 emit_insn (gen_absxf2 (tmp, op1));
41737 test = gen_rtx_GE (VOIDmode, tmp,
41738 CONST_DOUBLE_FROM_REAL_VALUE (
41739 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41740 XFmode));
41741 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41742
41743 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41744 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41745 emit_jump (label2);
41746
41747 emit_label (label1);
41748 emit_move_insn (tmp, CONST1_RTX (XFmode));
41749 emit_insn (gen_addxf3 (tmp, op1, tmp));
41750 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41751 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41752
41753 emit_label (label2);
41754 }
41755
41756 /* Emit code for round calculation. */
41757 void ix86_emit_i387_round (rtx op0, rtx op1)
41758 {
41759 enum machine_mode inmode = GET_MODE (op1);
41760 enum machine_mode outmode = GET_MODE (op0);
41761 rtx e1, e2, res, tmp, tmp1, half;
41762 rtx scratch = gen_reg_rtx (HImode);
41763 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41764 rtx_code_label *jump_label = gen_label_rtx ();
41765 rtx insn;
41766 rtx (*gen_abs) (rtx, rtx);
41767 rtx (*gen_neg) (rtx, rtx);
41768
41769 switch (inmode)
41770 {
41771 case SFmode:
41772 gen_abs = gen_abssf2;
41773 break;
41774 case DFmode:
41775 gen_abs = gen_absdf2;
41776 break;
41777 case XFmode:
41778 gen_abs = gen_absxf2;
41779 break;
41780 default:
41781 gcc_unreachable ();
41782 }
41783
41784 switch (outmode)
41785 {
41786 case SFmode:
41787 gen_neg = gen_negsf2;
41788 break;
41789 case DFmode:
41790 gen_neg = gen_negdf2;
41791 break;
41792 case XFmode:
41793 gen_neg = gen_negxf2;
41794 break;
41795 case HImode:
41796 gen_neg = gen_neghi2;
41797 break;
41798 case SImode:
41799 gen_neg = gen_negsi2;
41800 break;
41801 case DImode:
41802 gen_neg = gen_negdi2;
41803 break;
41804 default:
41805 gcc_unreachable ();
41806 }
41807
41808 e1 = gen_reg_rtx (inmode);
41809 e2 = gen_reg_rtx (inmode);
41810 res = gen_reg_rtx (outmode);
41811
41812 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41813
41814 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41815
41816 /* scratch = fxam(op1) */
41817 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41818 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41819 UNSPEC_FXAM)));
41820 /* e1 = fabs(op1) */
41821 emit_insn (gen_abs (e1, op1));
41822
41823 /* e2 = e1 + 0.5 */
41824 half = force_reg (inmode, half);
41825 emit_insn (gen_rtx_SET (VOIDmode, e2,
41826 gen_rtx_PLUS (inmode, e1, half)));
41827
41828 /* res = floor(e2) */
41829 if (inmode != XFmode)
41830 {
41831 tmp1 = gen_reg_rtx (XFmode);
41832
41833 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41834 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41835 }
41836 else
41837 tmp1 = e2;
41838
41839 switch (outmode)
41840 {
41841 case SFmode:
41842 case DFmode:
41843 {
41844 rtx tmp0 = gen_reg_rtx (XFmode);
41845
41846 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41847
41848 emit_insn (gen_rtx_SET (VOIDmode, res,
41849 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41850 UNSPEC_TRUNC_NOOP)));
41851 }
41852 break;
41853 case XFmode:
41854 emit_insn (gen_frndintxf2_floor (res, tmp1));
41855 break;
41856 case HImode:
41857 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41858 break;
41859 case SImode:
41860 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41861 break;
41862 case DImode:
41863 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41864 break;
41865 default:
41866 gcc_unreachable ();
41867 }
41868
41869 /* flags = signbit(a) */
41870 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41871
41872 /* if (flags) then res = -res */
41873 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41874 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41875 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41876 pc_rtx);
41877 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41878 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41879 JUMP_LABEL (insn) = jump_label;
41880
41881 emit_insn (gen_neg (res, res));
41882
41883 emit_label (jump_label);
41884 LABEL_NUSES (jump_label) = 1;
41885
41886 emit_move_insn (op0, res);
41887 }
41888
41889 /* Output code to perform a Newton-Rhapson approximation of a single precision
41890 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41891
41892 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41893 {
41894 rtx x0, x1, e0, e1;
41895
41896 x0 = gen_reg_rtx (mode);
41897 e0 = gen_reg_rtx (mode);
41898 e1 = gen_reg_rtx (mode);
41899 x1 = gen_reg_rtx (mode);
41900
41901 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41902
41903 b = force_reg (mode, b);
41904
41905 /* x0 = rcp(b) estimate */
41906 if (mode == V16SFmode || mode == V8DFmode)
41907 emit_insn (gen_rtx_SET (VOIDmode, x0,
41908 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41909 UNSPEC_RCP14)));
41910 else
41911 emit_insn (gen_rtx_SET (VOIDmode, x0,
41912 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41913 UNSPEC_RCP)));
41914
41915 /* e0 = x0 * b */
41916 emit_insn (gen_rtx_SET (VOIDmode, e0,
41917 gen_rtx_MULT (mode, x0, b)));
41918
41919 /* e0 = x0 * e0 */
41920 emit_insn (gen_rtx_SET (VOIDmode, e0,
41921 gen_rtx_MULT (mode, x0, e0)));
41922
41923 /* e1 = x0 + x0 */
41924 emit_insn (gen_rtx_SET (VOIDmode, e1,
41925 gen_rtx_PLUS (mode, x0, x0)));
41926
41927 /* x1 = e1 - e0 */
41928 emit_insn (gen_rtx_SET (VOIDmode, x1,
41929 gen_rtx_MINUS (mode, e1, e0)));
41930
41931 /* res = a * x1 */
41932 emit_insn (gen_rtx_SET (VOIDmode, res,
41933 gen_rtx_MULT (mode, a, x1)));
41934 }
41935
41936 /* Output code to perform a Newton-Rhapson approximation of a
41937 single precision floating point [reciprocal] square root. */
41938
41939 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41940 bool recip)
41941 {
41942 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41943 REAL_VALUE_TYPE r;
41944 int unspec;
41945
41946 x0 = gen_reg_rtx (mode);
41947 e0 = gen_reg_rtx (mode);
41948 e1 = gen_reg_rtx (mode);
41949 e2 = gen_reg_rtx (mode);
41950 e3 = gen_reg_rtx (mode);
41951
41952 real_from_integer (&r, VOIDmode, -3, SIGNED);
41953 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41954
41955 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41956 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41957 unspec = UNSPEC_RSQRT;
41958
41959 if (VECTOR_MODE_P (mode))
41960 {
41961 mthree = ix86_build_const_vector (mode, true, mthree);
41962 mhalf = ix86_build_const_vector (mode, true, mhalf);
41963 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41964 if (GET_MODE_SIZE (mode) == 64)
41965 unspec = UNSPEC_RSQRT14;
41966 }
41967
41968 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41969 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41970
41971 a = force_reg (mode, a);
41972
41973 /* x0 = rsqrt(a) estimate */
41974 emit_insn (gen_rtx_SET (VOIDmode, x0,
41975 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41976 unspec)));
41977
41978 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41979 if (!recip)
41980 {
41981 rtx zero, mask;
41982
41983 zero = gen_reg_rtx (mode);
41984 mask = gen_reg_rtx (mode);
41985
41986 zero = force_reg (mode, CONST0_RTX(mode));
41987
41988 /* Handle masked compare. */
41989 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41990 {
41991 mask = gen_reg_rtx (HImode);
41992 /* Imm value 0x4 corresponds to not-equal comparison. */
41993 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41994 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41995 }
41996 else
41997 {
41998 emit_insn (gen_rtx_SET (VOIDmode, mask,
41999 gen_rtx_NE (mode, zero, a)));
42000
42001 emit_insn (gen_rtx_SET (VOIDmode, x0,
42002 gen_rtx_AND (mode, x0, mask)));
42003 }
42004 }
42005
42006 /* e0 = x0 * a */
42007 emit_insn (gen_rtx_SET (VOIDmode, e0,
42008 gen_rtx_MULT (mode, x0, a)));
42009 /* e1 = e0 * x0 */
42010 emit_insn (gen_rtx_SET (VOIDmode, e1,
42011 gen_rtx_MULT (mode, e0, x0)));
42012
42013 /* e2 = e1 - 3. */
42014 mthree = force_reg (mode, mthree);
42015 emit_insn (gen_rtx_SET (VOIDmode, e2,
42016 gen_rtx_PLUS (mode, e1, mthree)));
42017
42018 mhalf = force_reg (mode, mhalf);
42019 if (recip)
42020 /* e3 = -.5 * x0 */
42021 emit_insn (gen_rtx_SET (VOIDmode, e3,
42022 gen_rtx_MULT (mode, x0, mhalf)));
42023 else
42024 /* e3 = -.5 * e0 */
42025 emit_insn (gen_rtx_SET (VOIDmode, e3,
42026 gen_rtx_MULT (mode, e0, mhalf)));
42027 /* ret = e2 * e3 */
42028 emit_insn (gen_rtx_SET (VOIDmode, res,
42029 gen_rtx_MULT (mode, e2, e3)));
42030 }
42031
42032 #ifdef TARGET_SOLARIS
42033 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
42034
42035 static void
42036 i386_solaris_elf_named_section (const char *name, unsigned int flags,
42037 tree decl)
42038 {
42039 /* With Binutils 2.15, the "@unwind" marker must be specified on
42040 every occurrence of the ".eh_frame" section, not just the first
42041 one. */
42042 if (TARGET_64BIT
42043 && strcmp (name, ".eh_frame") == 0)
42044 {
42045 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
42046 flags & SECTION_WRITE ? "aw" : "a");
42047 return;
42048 }
42049
42050 #ifndef USE_GAS
42051 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
42052 {
42053 solaris_elf_asm_comdat_section (name, flags, decl);
42054 return;
42055 }
42056 #endif
42057
42058 default_elf_asm_named_section (name, flags, decl);
42059 }
42060 #endif /* TARGET_SOLARIS */
42061
42062 /* Return the mangling of TYPE if it is an extended fundamental type. */
42063
42064 static const char *
42065 ix86_mangle_type (const_tree type)
42066 {
42067 type = TYPE_MAIN_VARIANT (type);
42068
42069 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
42070 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
42071 return NULL;
42072
42073 switch (TYPE_MODE (type))
42074 {
42075 case TFmode:
42076 /* __float128 is "g". */
42077 return "g";
42078 case XFmode:
42079 /* "long double" or __float80 is "e". */
42080 return "e";
42081 default:
42082 return NULL;
42083 }
42084 }
42085
42086 /* For 32-bit code we can save PIC register setup by using
42087 __stack_chk_fail_local hidden function instead of calling
42088 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
42089 register, so it is better to call __stack_chk_fail directly. */
42090
42091 static tree ATTRIBUTE_UNUSED
42092 ix86_stack_protect_fail (void)
42093 {
42094 return TARGET_64BIT
42095 ? default_external_stack_protect_fail ()
42096 : default_hidden_stack_protect_fail ();
42097 }
42098
42099 /* Select a format to encode pointers in exception handling data. CODE
42100 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
42101 true if the symbol may be affected by dynamic relocations.
42102
42103 ??? All x86 object file formats are capable of representing this.
42104 After all, the relocation needed is the same as for the call insn.
42105 Whether or not a particular assembler allows us to enter such, I
42106 guess we'll have to see. */
42107 int
42108 asm_preferred_eh_data_format (int code, int global)
42109 {
42110 if (flag_pic)
42111 {
42112 int type = DW_EH_PE_sdata8;
42113 if (!TARGET_64BIT
42114 || ix86_cmodel == CM_SMALL_PIC
42115 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
42116 type = DW_EH_PE_sdata4;
42117 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
42118 }
42119 if (ix86_cmodel == CM_SMALL
42120 || (ix86_cmodel == CM_MEDIUM && code))
42121 return DW_EH_PE_udata4;
42122 return DW_EH_PE_absptr;
42123 }
42124 \f
42125 /* Expand copysign from SIGN to the positive value ABS_VALUE
42126 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
42127 the sign-bit. */
42128 static void
42129 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
42130 {
42131 enum machine_mode mode = GET_MODE (sign);
42132 rtx sgn = gen_reg_rtx (mode);
42133 if (mask == NULL_RTX)
42134 {
42135 enum machine_mode vmode;
42136
42137 if (mode == SFmode)
42138 vmode = V4SFmode;
42139 else if (mode == DFmode)
42140 vmode = V2DFmode;
42141 else
42142 vmode = mode;
42143
42144 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
42145 if (!VECTOR_MODE_P (mode))
42146 {
42147 /* We need to generate a scalar mode mask in this case. */
42148 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42149 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42150 mask = gen_reg_rtx (mode);
42151 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42152 }
42153 }
42154 else
42155 mask = gen_rtx_NOT (mode, mask);
42156 emit_insn (gen_rtx_SET (VOIDmode, sgn,
42157 gen_rtx_AND (mode, mask, sign)));
42158 emit_insn (gen_rtx_SET (VOIDmode, result,
42159 gen_rtx_IOR (mode, abs_value, sgn)));
42160 }
42161
42162 /* Expand fabs (OP0) and return a new rtx that holds the result. The
42163 mask for masking out the sign-bit is stored in *SMASK, if that is
42164 non-null. */
42165 static rtx
42166 ix86_expand_sse_fabs (rtx op0, rtx *smask)
42167 {
42168 enum machine_mode vmode, mode = GET_MODE (op0);
42169 rtx xa, mask;
42170
42171 xa = gen_reg_rtx (mode);
42172 if (mode == SFmode)
42173 vmode = V4SFmode;
42174 else if (mode == DFmode)
42175 vmode = V2DFmode;
42176 else
42177 vmode = mode;
42178 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
42179 if (!VECTOR_MODE_P (mode))
42180 {
42181 /* We need to generate a scalar mode mask in this case. */
42182 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42183 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42184 mask = gen_reg_rtx (mode);
42185 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42186 }
42187 emit_insn (gen_rtx_SET (VOIDmode, xa,
42188 gen_rtx_AND (mode, op0, mask)));
42189
42190 if (smask)
42191 *smask = mask;
42192
42193 return xa;
42194 }
42195
42196 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
42197 swapping the operands if SWAP_OPERANDS is true. The expanded
42198 code is a forward jump to a newly created label in case the
42199 comparison is true. The generated label rtx is returned. */
42200 static rtx_code_label *
42201 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
42202 bool swap_operands)
42203 {
42204 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
42205 rtx_code_label *label;
42206 rtx tmp;
42207
42208 if (swap_operands)
42209 {
42210 tmp = op0;
42211 op0 = op1;
42212 op1 = tmp;
42213 }
42214
42215 label = gen_label_rtx ();
42216 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
42217 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42218 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
42219 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
42220 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
42221 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
42222 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
42223 JUMP_LABEL (tmp) = label;
42224
42225 return label;
42226 }
42227
42228 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
42229 using comparison code CODE. Operands are swapped for the comparison if
42230 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
42231 static rtx
42232 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
42233 bool swap_operands)
42234 {
42235 rtx (*insn)(rtx, rtx, rtx, rtx);
42236 enum machine_mode mode = GET_MODE (op0);
42237 rtx mask = gen_reg_rtx (mode);
42238
42239 if (swap_operands)
42240 {
42241 rtx tmp = op0;
42242 op0 = op1;
42243 op1 = tmp;
42244 }
42245
42246 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
42247
42248 emit_insn (insn (mask, op0, op1,
42249 gen_rtx_fmt_ee (code, mode, op0, op1)));
42250 return mask;
42251 }
42252
42253 /* Generate and return a rtx of mode MODE for 2**n where n is the number
42254 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
42255 static rtx
42256 ix86_gen_TWO52 (enum machine_mode mode)
42257 {
42258 REAL_VALUE_TYPE TWO52r;
42259 rtx TWO52;
42260
42261 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
42262 TWO52 = const_double_from_real_value (TWO52r, mode);
42263 TWO52 = force_reg (mode, TWO52);
42264
42265 return TWO52;
42266 }
42267
42268 /* Expand SSE sequence for computing lround from OP1 storing
42269 into OP0. */
42270 void
42271 ix86_expand_lround (rtx op0, rtx op1)
42272 {
42273 /* C code for the stuff we're doing below:
42274 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
42275 return (long)tmp;
42276 */
42277 enum machine_mode mode = GET_MODE (op1);
42278 const struct real_format *fmt;
42279 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42280 rtx adj;
42281
42282 /* load nextafter (0.5, 0.0) */
42283 fmt = REAL_MODE_FORMAT (mode);
42284 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42285 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42286
42287 /* adj = copysign (0.5, op1) */
42288 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
42289 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
42290
42291 /* adj = op1 + adj */
42292 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
42293
42294 /* op0 = (imode)adj */
42295 expand_fix (op0, adj, 0);
42296 }
42297
42298 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
42299 into OPERAND0. */
42300 void
42301 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
42302 {
42303 /* C code for the stuff we're doing below (for do_floor):
42304 xi = (long)op1;
42305 xi -= (double)xi > op1 ? 1 : 0;
42306 return xi;
42307 */
42308 enum machine_mode fmode = GET_MODE (op1);
42309 enum machine_mode imode = GET_MODE (op0);
42310 rtx ireg, freg, tmp;
42311 rtx_code_label *label;
42312
42313 /* reg = (long)op1 */
42314 ireg = gen_reg_rtx (imode);
42315 expand_fix (ireg, op1, 0);
42316
42317 /* freg = (double)reg */
42318 freg = gen_reg_rtx (fmode);
42319 expand_float (freg, ireg, 0);
42320
42321 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42322 label = ix86_expand_sse_compare_and_jump (UNLE,
42323 freg, op1, !do_floor);
42324 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42325 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42326 emit_move_insn (ireg, tmp);
42327
42328 emit_label (label);
42329 LABEL_NUSES (label) = 1;
42330
42331 emit_move_insn (op0, ireg);
42332 }
42333
42334 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42335 result in OPERAND0. */
42336 void
42337 ix86_expand_rint (rtx operand0, rtx operand1)
42338 {
42339 /* C code for the stuff we're doing below:
42340 xa = fabs (operand1);
42341 if (!isless (xa, 2**52))
42342 return operand1;
42343 xa = xa + 2**52 - 2**52;
42344 return copysign (xa, operand1);
42345 */
42346 enum machine_mode mode = GET_MODE (operand0);
42347 rtx res, xa, TWO52, mask;
42348 rtx_code_label *label;
42349
42350 res = gen_reg_rtx (mode);
42351 emit_move_insn (res, operand1);
42352
42353 /* xa = abs (operand1) */
42354 xa = ix86_expand_sse_fabs (res, &mask);
42355
42356 /* if (!isless (xa, TWO52)) goto label; */
42357 TWO52 = ix86_gen_TWO52 (mode);
42358 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42359
42360 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42361 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42362
42363 ix86_sse_copysign_to_positive (res, xa, res, mask);
42364
42365 emit_label (label);
42366 LABEL_NUSES (label) = 1;
42367
42368 emit_move_insn (operand0, res);
42369 }
42370
42371 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42372 into OPERAND0. */
42373 void
42374 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42375 {
42376 /* C code for the stuff we expand below.
42377 double xa = fabs (x), x2;
42378 if (!isless (xa, TWO52))
42379 return x;
42380 xa = xa + TWO52 - TWO52;
42381 x2 = copysign (xa, x);
42382 Compensate. Floor:
42383 if (x2 > x)
42384 x2 -= 1;
42385 Compensate. Ceil:
42386 if (x2 < x)
42387 x2 -= -1;
42388 return x2;
42389 */
42390 enum machine_mode mode = GET_MODE (operand0);
42391 rtx xa, TWO52, tmp, one, res, mask;
42392 rtx_code_label *label;
42393
42394 TWO52 = ix86_gen_TWO52 (mode);
42395
42396 /* Temporary for holding the result, initialized to the input
42397 operand to ease control flow. */
42398 res = gen_reg_rtx (mode);
42399 emit_move_insn (res, operand1);
42400
42401 /* xa = abs (operand1) */
42402 xa = ix86_expand_sse_fabs (res, &mask);
42403
42404 /* if (!isless (xa, TWO52)) goto label; */
42405 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42406
42407 /* xa = xa + TWO52 - TWO52; */
42408 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42409 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42410
42411 /* xa = copysign (xa, operand1) */
42412 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42413
42414 /* generate 1.0 or -1.0 */
42415 one = force_reg (mode,
42416 const_double_from_real_value (do_floor
42417 ? dconst1 : dconstm1, mode));
42418
42419 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42420 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42421 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42422 gen_rtx_AND (mode, one, tmp)));
42423 /* We always need to subtract here to preserve signed zero. */
42424 tmp = expand_simple_binop (mode, MINUS,
42425 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42426 emit_move_insn (res, tmp);
42427
42428 emit_label (label);
42429 LABEL_NUSES (label) = 1;
42430
42431 emit_move_insn (operand0, res);
42432 }
42433
42434 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42435 into OPERAND0. */
42436 void
42437 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42438 {
42439 /* C code for the stuff we expand below.
42440 double xa = fabs (x), x2;
42441 if (!isless (xa, TWO52))
42442 return x;
42443 x2 = (double)(long)x;
42444 Compensate. Floor:
42445 if (x2 > x)
42446 x2 -= 1;
42447 Compensate. Ceil:
42448 if (x2 < x)
42449 x2 += 1;
42450 if (HONOR_SIGNED_ZEROS (mode))
42451 return copysign (x2, x);
42452 return x2;
42453 */
42454 enum machine_mode mode = GET_MODE (operand0);
42455 rtx xa, xi, TWO52, tmp, one, res, mask;
42456 rtx_code_label *label;
42457
42458 TWO52 = ix86_gen_TWO52 (mode);
42459
42460 /* Temporary for holding the result, initialized to the input
42461 operand to ease control flow. */
42462 res = gen_reg_rtx (mode);
42463 emit_move_insn (res, operand1);
42464
42465 /* xa = abs (operand1) */
42466 xa = ix86_expand_sse_fabs (res, &mask);
42467
42468 /* if (!isless (xa, TWO52)) goto label; */
42469 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42470
42471 /* xa = (double)(long)x */
42472 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42473 expand_fix (xi, res, 0);
42474 expand_float (xa, xi, 0);
42475
42476 /* generate 1.0 */
42477 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42478
42479 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42480 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42481 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42482 gen_rtx_AND (mode, one, tmp)));
42483 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42484 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42485 emit_move_insn (res, tmp);
42486
42487 if (HONOR_SIGNED_ZEROS (mode))
42488 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42489
42490 emit_label (label);
42491 LABEL_NUSES (label) = 1;
42492
42493 emit_move_insn (operand0, res);
42494 }
42495
42496 /* Expand SSE sequence for computing round from OPERAND1 storing
42497 into OPERAND0. Sequence that works without relying on DImode truncation
42498 via cvttsd2siq that is only available on 64bit targets. */
42499 void
42500 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42501 {
42502 /* C code for the stuff we expand below.
42503 double xa = fabs (x), xa2, x2;
42504 if (!isless (xa, TWO52))
42505 return x;
42506 Using the absolute value and copying back sign makes
42507 -0.0 -> -0.0 correct.
42508 xa2 = xa + TWO52 - TWO52;
42509 Compensate.
42510 dxa = xa2 - xa;
42511 if (dxa <= -0.5)
42512 xa2 += 1;
42513 else if (dxa > 0.5)
42514 xa2 -= 1;
42515 x2 = copysign (xa2, x);
42516 return x2;
42517 */
42518 enum machine_mode mode = GET_MODE (operand0);
42519 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42520 rtx_code_label *label;
42521
42522 TWO52 = ix86_gen_TWO52 (mode);
42523
42524 /* Temporary for holding the result, initialized to the input
42525 operand to ease control flow. */
42526 res = gen_reg_rtx (mode);
42527 emit_move_insn (res, operand1);
42528
42529 /* xa = abs (operand1) */
42530 xa = ix86_expand_sse_fabs (res, &mask);
42531
42532 /* if (!isless (xa, TWO52)) goto label; */
42533 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42534
42535 /* xa2 = xa + TWO52 - TWO52; */
42536 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42537 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42538
42539 /* dxa = xa2 - xa; */
42540 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42541
42542 /* generate 0.5, 1.0 and -0.5 */
42543 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42544 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42545 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42546 0, OPTAB_DIRECT);
42547
42548 /* Compensate. */
42549 tmp = gen_reg_rtx (mode);
42550 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42551 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42552 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42553 gen_rtx_AND (mode, one, tmp)));
42554 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42555 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42556 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42557 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42558 gen_rtx_AND (mode, one, tmp)));
42559 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42560
42561 /* res = copysign (xa2, operand1) */
42562 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42563
42564 emit_label (label);
42565 LABEL_NUSES (label) = 1;
42566
42567 emit_move_insn (operand0, res);
42568 }
42569
42570 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42571 into OPERAND0. */
42572 void
42573 ix86_expand_trunc (rtx operand0, rtx operand1)
42574 {
42575 /* C code for SSE variant we expand below.
42576 double xa = fabs (x), x2;
42577 if (!isless (xa, TWO52))
42578 return x;
42579 x2 = (double)(long)x;
42580 if (HONOR_SIGNED_ZEROS (mode))
42581 return copysign (x2, x);
42582 return x2;
42583 */
42584 enum machine_mode mode = GET_MODE (operand0);
42585 rtx xa, xi, TWO52, res, mask;
42586 rtx_code_label *label;
42587
42588 TWO52 = ix86_gen_TWO52 (mode);
42589
42590 /* Temporary for holding the result, initialized to the input
42591 operand to ease control flow. */
42592 res = gen_reg_rtx (mode);
42593 emit_move_insn (res, operand1);
42594
42595 /* xa = abs (operand1) */
42596 xa = ix86_expand_sse_fabs (res, &mask);
42597
42598 /* if (!isless (xa, TWO52)) goto label; */
42599 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42600
42601 /* x = (double)(long)x */
42602 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42603 expand_fix (xi, res, 0);
42604 expand_float (res, xi, 0);
42605
42606 if (HONOR_SIGNED_ZEROS (mode))
42607 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42608
42609 emit_label (label);
42610 LABEL_NUSES (label) = 1;
42611
42612 emit_move_insn (operand0, res);
42613 }
42614
42615 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42616 into OPERAND0. */
42617 void
42618 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42619 {
42620 enum machine_mode mode = GET_MODE (operand0);
42621 rtx xa, mask, TWO52, one, res, smask, tmp;
42622 rtx_code_label *label;
42623
42624 /* C code for SSE variant we expand below.
42625 double xa = fabs (x), x2;
42626 if (!isless (xa, TWO52))
42627 return x;
42628 xa2 = xa + TWO52 - TWO52;
42629 Compensate:
42630 if (xa2 > xa)
42631 xa2 -= 1.0;
42632 x2 = copysign (xa2, x);
42633 return x2;
42634 */
42635
42636 TWO52 = ix86_gen_TWO52 (mode);
42637
42638 /* Temporary for holding the result, initialized to the input
42639 operand to ease control flow. */
42640 res = gen_reg_rtx (mode);
42641 emit_move_insn (res, operand1);
42642
42643 /* xa = abs (operand1) */
42644 xa = ix86_expand_sse_fabs (res, &smask);
42645
42646 /* if (!isless (xa, TWO52)) goto label; */
42647 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42648
42649 /* res = xa + TWO52 - TWO52; */
42650 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42651 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42652 emit_move_insn (res, tmp);
42653
42654 /* generate 1.0 */
42655 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42656
42657 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42658 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42659 emit_insn (gen_rtx_SET (VOIDmode, mask,
42660 gen_rtx_AND (mode, mask, one)));
42661 tmp = expand_simple_binop (mode, MINUS,
42662 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42663 emit_move_insn (res, tmp);
42664
42665 /* res = copysign (res, operand1) */
42666 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42667
42668 emit_label (label);
42669 LABEL_NUSES (label) = 1;
42670
42671 emit_move_insn (operand0, res);
42672 }
42673
42674 /* Expand SSE sequence for computing round from OPERAND1 storing
42675 into OPERAND0. */
42676 void
42677 ix86_expand_round (rtx operand0, rtx operand1)
42678 {
42679 /* C code for the stuff we're doing below:
42680 double xa = fabs (x);
42681 if (!isless (xa, TWO52))
42682 return x;
42683 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42684 return copysign (xa, x);
42685 */
42686 enum machine_mode mode = GET_MODE (operand0);
42687 rtx res, TWO52, xa, xi, half, mask;
42688 rtx_code_label *label;
42689 const struct real_format *fmt;
42690 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42691
42692 /* Temporary for holding the result, initialized to the input
42693 operand to ease control flow. */
42694 res = gen_reg_rtx (mode);
42695 emit_move_insn (res, operand1);
42696
42697 TWO52 = ix86_gen_TWO52 (mode);
42698 xa = ix86_expand_sse_fabs (res, &mask);
42699 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42700
42701 /* load nextafter (0.5, 0.0) */
42702 fmt = REAL_MODE_FORMAT (mode);
42703 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42704 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42705
42706 /* xa = xa + 0.5 */
42707 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42708 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42709
42710 /* xa = (double)(int64_t)xa */
42711 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42712 expand_fix (xi, xa, 0);
42713 expand_float (xa, xi, 0);
42714
42715 /* res = copysign (xa, operand1) */
42716 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42717
42718 emit_label (label);
42719 LABEL_NUSES (label) = 1;
42720
42721 emit_move_insn (operand0, res);
42722 }
42723
42724 /* Expand SSE sequence for computing round
42725 from OP1 storing into OP0 using sse4 round insn. */
42726 void
42727 ix86_expand_round_sse4 (rtx op0, rtx op1)
42728 {
42729 enum machine_mode mode = GET_MODE (op0);
42730 rtx e1, e2, res, half;
42731 const struct real_format *fmt;
42732 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42733 rtx (*gen_copysign) (rtx, rtx, rtx);
42734 rtx (*gen_round) (rtx, rtx, rtx);
42735
42736 switch (mode)
42737 {
42738 case SFmode:
42739 gen_copysign = gen_copysignsf3;
42740 gen_round = gen_sse4_1_roundsf2;
42741 break;
42742 case DFmode:
42743 gen_copysign = gen_copysigndf3;
42744 gen_round = gen_sse4_1_rounddf2;
42745 break;
42746 default:
42747 gcc_unreachable ();
42748 }
42749
42750 /* round (a) = trunc (a + copysign (0.5, a)) */
42751
42752 /* load nextafter (0.5, 0.0) */
42753 fmt = REAL_MODE_FORMAT (mode);
42754 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42755 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42756 half = const_double_from_real_value (pred_half, mode);
42757
42758 /* e1 = copysign (0.5, op1) */
42759 e1 = gen_reg_rtx (mode);
42760 emit_insn (gen_copysign (e1, half, op1));
42761
42762 /* e2 = op1 + e1 */
42763 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42764
42765 /* res = trunc (e2) */
42766 res = gen_reg_rtx (mode);
42767 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42768
42769 emit_move_insn (op0, res);
42770 }
42771 \f
42772
42773 /* Table of valid machine attributes. */
42774 static const struct attribute_spec ix86_attribute_table[] =
42775 {
42776 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42777 affects_type_identity } */
42778 /* Stdcall attribute says callee is responsible for popping arguments
42779 if they are not variable. */
42780 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42781 true },
42782 /* Fastcall attribute says callee is responsible for popping arguments
42783 if they are not variable. */
42784 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42785 true },
42786 /* Thiscall attribute says callee is responsible for popping arguments
42787 if they are not variable. */
42788 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42789 true },
42790 /* Cdecl attribute says the callee is a normal C declaration */
42791 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42792 true },
42793 /* Regparm attribute specifies how many integer arguments are to be
42794 passed in registers. */
42795 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42796 true },
42797 /* Sseregparm attribute says we are using x86_64 calling conventions
42798 for FP arguments. */
42799 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42800 true },
42801 /* The transactional memory builtins are implicitly regparm or fastcall
42802 depending on the ABI. Override the generic do-nothing attribute that
42803 these builtins were declared with. */
42804 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42805 true },
42806 /* force_align_arg_pointer says this function realigns the stack at entry. */
42807 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42808 false, true, true, ix86_handle_cconv_attribute, false },
42809 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42810 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42811 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42812 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42813 false },
42814 #endif
42815 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42816 false },
42817 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42818 false },
42819 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42820 SUBTARGET_ATTRIBUTE_TABLE,
42821 #endif
42822 /* ms_abi and sysv_abi calling convention function attributes. */
42823 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42824 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42825 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42826 false },
42827 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42828 ix86_handle_callee_pop_aggregate_return, true },
42829 /* End element. */
42830 { NULL, 0, 0, false, false, false, NULL, false }
42831 };
42832
42833 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42834 static int
42835 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42836 tree vectype, int)
42837 {
42838 unsigned elements;
42839
42840 switch (type_of_cost)
42841 {
42842 case scalar_stmt:
42843 return ix86_cost->scalar_stmt_cost;
42844
42845 case scalar_load:
42846 return ix86_cost->scalar_load_cost;
42847
42848 case scalar_store:
42849 return ix86_cost->scalar_store_cost;
42850
42851 case vector_stmt:
42852 return ix86_cost->vec_stmt_cost;
42853
42854 case vector_load:
42855 return ix86_cost->vec_align_load_cost;
42856
42857 case vector_store:
42858 return ix86_cost->vec_store_cost;
42859
42860 case vec_to_scalar:
42861 return ix86_cost->vec_to_scalar_cost;
42862
42863 case scalar_to_vec:
42864 return ix86_cost->scalar_to_vec_cost;
42865
42866 case unaligned_load:
42867 case unaligned_store:
42868 return ix86_cost->vec_unalign_load_cost;
42869
42870 case cond_branch_taken:
42871 return ix86_cost->cond_taken_branch_cost;
42872
42873 case cond_branch_not_taken:
42874 return ix86_cost->cond_not_taken_branch_cost;
42875
42876 case vec_perm:
42877 case vec_promote_demote:
42878 return ix86_cost->vec_stmt_cost;
42879
42880 case vec_construct:
42881 elements = TYPE_VECTOR_SUBPARTS (vectype);
42882 return elements / 2 + 1;
42883
42884 default:
42885 gcc_unreachable ();
42886 }
42887 }
42888
42889 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42890 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42891 insn every time. */
42892
42893 static GTY(()) rtx_insn *vselect_insn;
42894
42895 /* Initialize vselect_insn. */
42896
42897 static void
42898 init_vselect_insn (void)
42899 {
42900 unsigned i;
42901 rtx x;
42902
42903 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42904 for (i = 0; i < MAX_VECT_LEN; ++i)
42905 XVECEXP (x, 0, i) = const0_rtx;
42906 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42907 const0_rtx), x);
42908 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42909 start_sequence ();
42910 vselect_insn = emit_insn (x);
42911 end_sequence ();
42912 }
42913
42914 /* Construct (set target (vec_select op0 (parallel perm))) and
42915 return true if that's a valid instruction in the active ISA. */
42916
42917 static bool
42918 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42919 unsigned nelt, bool testing_p)
42920 {
42921 unsigned int i;
42922 rtx x, save_vconcat;
42923 int icode;
42924
42925 if (vselect_insn == NULL_RTX)
42926 init_vselect_insn ();
42927
42928 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42929 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42930 for (i = 0; i < nelt; ++i)
42931 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42932 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42933 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42934 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42935 SET_DEST (PATTERN (vselect_insn)) = target;
42936 icode = recog_memoized (vselect_insn);
42937
42938 if (icode >= 0 && !testing_p)
42939 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42940
42941 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42942 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42943 INSN_CODE (vselect_insn) = -1;
42944
42945 return icode >= 0;
42946 }
42947
42948 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42949
42950 static bool
42951 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42952 const unsigned char *perm, unsigned nelt,
42953 bool testing_p)
42954 {
42955 enum machine_mode v2mode;
42956 rtx x;
42957 bool ok;
42958
42959 if (vselect_insn == NULL_RTX)
42960 init_vselect_insn ();
42961
42962 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42963 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42964 PUT_MODE (x, v2mode);
42965 XEXP (x, 0) = op0;
42966 XEXP (x, 1) = op1;
42967 ok = expand_vselect (target, x, perm, nelt, testing_p);
42968 XEXP (x, 0) = const0_rtx;
42969 XEXP (x, 1) = const0_rtx;
42970 return ok;
42971 }
42972
42973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42974 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42975
42976 static bool
42977 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42978 {
42979 enum machine_mode vmode = d->vmode;
42980 unsigned i, mask, nelt = d->nelt;
42981 rtx target, op0, op1, x;
42982 rtx rperm[32], vperm;
42983
42984 if (d->one_operand_p)
42985 return false;
42986 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
42987 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
42988 ;
42989 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42990 ;
42991 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42992 ;
42993 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42994 ;
42995 else
42996 return false;
42997
42998 /* This is a blend, not a permute. Elements must stay in their
42999 respective lanes. */
43000 for (i = 0; i < nelt; ++i)
43001 {
43002 unsigned e = d->perm[i];
43003 if (!(e == i || e == i + nelt))
43004 return false;
43005 }
43006
43007 if (d->testing_p)
43008 return true;
43009
43010 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
43011 decision should be extracted elsewhere, so that we only try that
43012 sequence once all budget==3 options have been tried. */
43013 target = d->target;
43014 op0 = d->op0;
43015 op1 = d->op1;
43016 mask = 0;
43017
43018 switch (vmode)
43019 {
43020 case V8DFmode:
43021 case V16SFmode:
43022 case V4DFmode:
43023 case V8SFmode:
43024 case V2DFmode:
43025 case V4SFmode:
43026 case V8HImode:
43027 case V8SImode:
43028 case V32HImode:
43029 case V64QImode:
43030 case V16SImode:
43031 case V8DImode:
43032 for (i = 0; i < nelt; ++i)
43033 mask |= (d->perm[i] >= nelt) << i;
43034 break;
43035
43036 case V2DImode:
43037 for (i = 0; i < 2; ++i)
43038 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
43039 vmode = V8HImode;
43040 goto do_subreg;
43041
43042 case V4SImode:
43043 for (i = 0; i < 4; ++i)
43044 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43045 vmode = V8HImode;
43046 goto do_subreg;
43047
43048 case V16QImode:
43049 /* See if bytes move in pairs so we can use pblendw with
43050 an immediate argument, rather than pblendvb with a vector
43051 argument. */
43052 for (i = 0; i < 16; i += 2)
43053 if (d->perm[i] + 1 != d->perm[i + 1])
43054 {
43055 use_pblendvb:
43056 for (i = 0; i < nelt; ++i)
43057 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
43058
43059 finish_pblendvb:
43060 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
43061 vperm = force_reg (vmode, vperm);
43062
43063 if (GET_MODE_SIZE (vmode) == 16)
43064 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
43065 else
43066 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
43067 if (target != d->target)
43068 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43069 return true;
43070 }
43071
43072 for (i = 0; i < 8; ++i)
43073 mask |= (d->perm[i * 2] >= 16) << i;
43074 vmode = V8HImode;
43075 /* FALLTHRU */
43076
43077 do_subreg:
43078 target = gen_reg_rtx (vmode);
43079 op0 = gen_lowpart (vmode, op0);
43080 op1 = gen_lowpart (vmode, op1);
43081 break;
43082
43083 case V32QImode:
43084 /* See if bytes move in pairs. If not, vpblendvb must be used. */
43085 for (i = 0; i < 32; i += 2)
43086 if (d->perm[i] + 1 != d->perm[i + 1])
43087 goto use_pblendvb;
43088 /* See if bytes move in quadruplets. If yes, vpblendd
43089 with immediate can be used. */
43090 for (i = 0; i < 32; i += 4)
43091 if (d->perm[i] + 2 != d->perm[i + 2])
43092 break;
43093 if (i < 32)
43094 {
43095 /* See if bytes move the same in both lanes. If yes,
43096 vpblendw with immediate can be used. */
43097 for (i = 0; i < 16; i += 2)
43098 if (d->perm[i] + 16 != d->perm[i + 16])
43099 goto use_pblendvb;
43100
43101 /* Use vpblendw. */
43102 for (i = 0; i < 16; ++i)
43103 mask |= (d->perm[i * 2] >= 32) << i;
43104 vmode = V16HImode;
43105 goto do_subreg;
43106 }
43107
43108 /* Use vpblendd. */
43109 for (i = 0; i < 8; ++i)
43110 mask |= (d->perm[i * 4] >= 32) << i;
43111 vmode = V8SImode;
43112 goto do_subreg;
43113
43114 case V16HImode:
43115 /* See if words move in pairs. If yes, vpblendd can be used. */
43116 for (i = 0; i < 16; i += 2)
43117 if (d->perm[i] + 1 != d->perm[i + 1])
43118 break;
43119 if (i < 16)
43120 {
43121 /* See if words move the same in both lanes. If not,
43122 vpblendvb must be used. */
43123 for (i = 0; i < 8; i++)
43124 if (d->perm[i] + 8 != d->perm[i + 8])
43125 {
43126 /* Use vpblendvb. */
43127 for (i = 0; i < 32; ++i)
43128 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
43129
43130 vmode = V32QImode;
43131 nelt = 32;
43132 target = gen_reg_rtx (vmode);
43133 op0 = gen_lowpart (vmode, op0);
43134 op1 = gen_lowpart (vmode, op1);
43135 goto finish_pblendvb;
43136 }
43137
43138 /* Use vpblendw. */
43139 for (i = 0; i < 16; ++i)
43140 mask |= (d->perm[i] >= 16) << i;
43141 break;
43142 }
43143
43144 /* Use vpblendd. */
43145 for (i = 0; i < 8; ++i)
43146 mask |= (d->perm[i * 2] >= 16) << i;
43147 vmode = V8SImode;
43148 goto do_subreg;
43149
43150 case V4DImode:
43151 /* Use vpblendd. */
43152 for (i = 0; i < 4; ++i)
43153 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43154 vmode = V8SImode;
43155 goto do_subreg;
43156
43157 default:
43158 gcc_unreachable ();
43159 }
43160
43161 /* This matches five different patterns with the different modes. */
43162 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
43163 x = gen_rtx_SET (VOIDmode, target, x);
43164 emit_insn (x);
43165 if (target != d->target)
43166 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43167
43168 return true;
43169 }
43170
43171 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43172 in terms of the variable form of vpermilps.
43173
43174 Note that we will have already failed the immediate input vpermilps,
43175 which requires that the high and low part shuffle be identical; the
43176 variable form doesn't require that. */
43177
43178 static bool
43179 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
43180 {
43181 rtx rperm[8], vperm;
43182 unsigned i;
43183
43184 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
43185 return false;
43186
43187 /* We can only permute within the 128-bit lane. */
43188 for (i = 0; i < 8; ++i)
43189 {
43190 unsigned e = d->perm[i];
43191 if (i < 4 ? e >= 4 : e < 4)
43192 return false;
43193 }
43194
43195 if (d->testing_p)
43196 return true;
43197
43198 for (i = 0; i < 8; ++i)
43199 {
43200 unsigned e = d->perm[i];
43201
43202 /* Within each 128-bit lane, the elements of op0 are numbered
43203 from 0 and the elements of op1 are numbered from 4. */
43204 if (e >= 8 + 4)
43205 e -= 8;
43206 else if (e >= 4)
43207 e -= 4;
43208
43209 rperm[i] = GEN_INT (e);
43210 }
43211
43212 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
43213 vperm = force_reg (V8SImode, vperm);
43214 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
43215
43216 return true;
43217 }
43218
43219 /* Return true if permutation D can be performed as VMODE permutation
43220 instead. */
43221
43222 static bool
43223 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
43224 {
43225 unsigned int i, j, chunk;
43226
43227 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
43228 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
43229 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
43230 return false;
43231
43232 if (GET_MODE_NUNITS (vmode) >= d->nelt)
43233 return true;
43234
43235 chunk = d->nelt / GET_MODE_NUNITS (vmode);
43236 for (i = 0; i < d->nelt; i += chunk)
43237 if (d->perm[i] & (chunk - 1))
43238 return false;
43239 else
43240 for (j = 1; j < chunk; ++j)
43241 if (d->perm[i] + j != d->perm[i + j])
43242 return false;
43243
43244 return true;
43245 }
43246
43247 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43248 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
43249
43250 static bool
43251 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
43252 {
43253 unsigned i, nelt, eltsz, mask;
43254 unsigned char perm[64];
43255 enum machine_mode vmode = V16QImode;
43256 rtx rperm[64], vperm, target, op0, op1;
43257
43258 nelt = d->nelt;
43259
43260 if (!d->one_operand_p)
43261 {
43262 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
43263 {
43264 if (TARGET_AVX2
43265 && valid_perm_using_mode_p (V2TImode, d))
43266 {
43267 if (d->testing_p)
43268 return true;
43269
43270 /* Use vperm2i128 insn. The pattern uses
43271 V4DImode instead of V2TImode. */
43272 target = d->target;
43273 if (d->vmode != V4DImode)
43274 target = gen_reg_rtx (V4DImode);
43275 op0 = gen_lowpart (V4DImode, d->op0);
43276 op1 = gen_lowpart (V4DImode, d->op1);
43277 rperm[0]
43278 = GEN_INT ((d->perm[0] / (nelt / 2))
43279 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
43280 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
43281 if (target != d->target)
43282 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43283 return true;
43284 }
43285 return false;
43286 }
43287 }
43288 else
43289 {
43290 if (GET_MODE_SIZE (d->vmode) == 16)
43291 {
43292 if (!TARGET_SSSE3)
43293 return false;
43294 }
43295 else if (GET_MODE_SIZE (d->vmode) == 32)
43296 {
43297 if (!TARGET_AVX2)
43298 return false;
43299
43300 /* V4DImode should be already handled through
43301 expand_vselect by vpermq instruction. */
43302 gcc_assert (d->vmode != V4DImode);
43303
43304 vmode = V32QImode;
43305 if (d->vmode == V8SImode
43306 || d->vmode == V16HImode
43307 || d->vmode == V32QImode)
43308 {
43309 /* First see if vpermq can be used for
43310 V8SImode/V16HImode/V32QImode. */
43311 if (valid_perm_using_mode_p (V4DImode, d))
43312 {
43313 for (i = 0; i < 4; i++)
43314 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43315 if (d->testing_p)
43316 return true;
43317 target = gen_reg_rtx (V4DImode);
43318 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43319 perm, 4, false))
43320 {
43321 emit_move_insn (d->target,
43322 gen_lowpart (d->vmode, target));
43323 return true;
43324 }
43325 return false;
43326 }
43327
43328 /* Next see if vpermd can be used. */
43329 if (valid_perm_using_mode_p (V8SImode, d))
43330 vmode = V8SImode;
43331 }
43332 /* Or if vpermps can be used. */
43333 else if (d->vmode == V8SFmode)
43334 vmode = V8SImode;
43335
43336 if (vmode == V32QImode)
43337 {
43338 /* vpshufb only works intra lanes, it is not
43339 possible to shuffle bytes in between the lanes. */
43340 for (i = 0; i < nelt; ++i)
43341 if ((d->perm[i] ^ i) & (nelt / 2))
43342 return false;
43343 }
43344 }
43345 else if (GET_MODE_SIZE (d->vmode) == 64)
43346 {
43347 if (!TARGET_AVX512BW)
43348 return false;
43349 if (vmode == V64QImode)
43350 {
43351 /* vpshufb only works intra lanes, it is not
43352 possible to shuffle bytes in between the lanes. */
43353 for (i = 0; i < nelt; ++i)
43354 if ((d->perm[i] ^ i) & (nelt / 4))
43355 return false;
43356 }
43357 }
43358 else
43359 return false;
43360 }
43361
43362 if (d->testing_p)
43363 return true;
43364
43365 if (vmode == V8SImode)
43366 for (i = 0; i < 8; ++i)
43367 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43368 else
43369 {
43370 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43371 if (!d->one_operand_p)
43372 mask = 2 * nelt - 1;
43373 else if (vmode == V16QImode)
43374 mask = nelt - 1;
43375 else if (vmode == V64QImode)
43376 mask = nelt / 4 - 1;
43377 else
43378 mask = nelt / 2 - 1;
43379
43380 for (i = 0; i < nelt; ++i)
43381 {
43382 unsigned j, e = d->perm[i] & mask;
43383 for (j = 0; j < eltsz; ++j)
43384 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43385 }
43386 }
43387
43388 vperm = gen_rtx_CONST_VECTOR (vmode,
43389 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43390 vperm = force_reg (vmode, vperm);
43391
43392 target = d->target;
43393 if (d->vmode != vmode)
43394 target = gen_reg_rtx (vmode);
43395 op0 = gen_lowpart (vmode, d->op0);
43396 if (d->one_operand_p)
43397 {
43398 if (vmode == V16QImode)
43399 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43400 else if (vmode == V32QImode)
43401 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43402 else if (vmode == V64QImode)
43403 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
43404 else if (vmode == V8SFmode)
43405 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43406 else
43407 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43408 }
43409 else
43410 {
43411 op1 = gen_lowpart (vmode, d->op1);
43412 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43413 }
43414 if (target != d->target)
43415 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43416
43417 return true;
43418 }
43419
43420 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43421 in a single instruction. */
43422
43423 static bool
43424 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43425 {
43426 unsigned i, nelt = d->nelt;
43427 unsigned char perm2[MAX_VECT_LEN];
43428
43429 /* Check plain VEC_SELECT first, because AVX has instructions that could
43430 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43431 input where SEL+CONCAT may not. */
43432 if (d->one_operand_p)
43433 {
43434 int mask = nelt - 1;
43435 bool identity_perm = true;
43436 bool broadcast_perm = true;
43437
43438 for (i = 0; i < nelt; i++)
43439 {
43440 perm2[i] = d->perm[i] & mask;
43441 if (perm2[i] != i)
43442 identity_perm = false;
43443 if (perm2[i])
43444 broadcast_perm = false;
43445 }
43446
43447 if (identity_perm)
43448 {
43449 if (!d->testing_p)
43450 emit_move_insn (d->target, d->op0);
43451 return true;
43452 }
43453 else if (broadcast_perm && TARGET_AVX2)
43454 {
43455 /* Use vpbroadcast{b,w,d}. */
43456 rtx (*gen) (rtx, rtx) = NULL;
43457 switch (d->vmode)
43458 {
43459 case V64QImode:
43460 if (TARGET_AVX512BW)
43461 gen = gen_avx512bw_vec_dupv64qi;
43462 break;
43463 case V32QImode:
43464 gen = gen_avx2_pbroadcastv32qi_1;
43465 break;
43466 case V32HImode:
43467 if (TARGET_AVX512BW)
43468 gen = gen_avx512bw_vec_dupv32hi;
43469 break;
43470 case V16HImode:
43471 gen = gen_avx2_pbroadcastv16hi_1;
43472 break;
43473 case V16SImode:
43474 if (TARGET_AVX512F)
43475 gen = gen_avx512f_vec_dupv16si;
43476 break;
43477 case V8SImode:
43478 gen = gen_avx2_pbroadcastv8si_1;
43479 break;
43480 case V16QImode:
43481 gen = gen_avx2_pbroadcastv16qi;
43482 break;
43483 case V8HImode:
43484 gen = gen_avx2_pbroadcastv8hi;
43485 break;
43486 case V16SFmode:
43487 if (TARGET_AVX512F)
43488 gen = gen_avx512f_vec_dupv16sf;
43489 break;
43490 case V8SFmode:
43491 gen = gen_avx2_vec_dupv8sf_1;
43492 break;
43493 case V8DFmode:
43494 if (TARGET_AVX512F)
43495 gen = gen_avx512f_vec_dupv8df;
43496 break;
43497 case V8DImode:
43498 if (TARGET_AVX512F)
43499 gen = gen_avx512f_vec_dupv8di;
43500 break;
43501 /* For other modes prefer other shuffles this function creates. */
43502 default: break;
43503 }
43504 if (gen != NULL)
43505 {
43506 if (!d->testing_p)
43507 emit_insn (gen (d->target, d->op0));
43508 return true;
43509 }
43510 }
43511
43512 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43513 return true;
43514
43515 /* There are plenty of patterns in sse.md that are written for
43516 SEL+CONCAT and are not replicated for a single op. Perhaps
43517 that should be changed, to avoid the nastiness here. */
43518
43519 /* Recognize interleave style patterns, which means incrementing
43520 every other permutation operand. */
43521 for (i = 0; i < nelt; i += 2)
43522 {
43523 perm2[i] = d->perm[i] & mask;
43524 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43525 }
43526 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43527 d->testing_p))
43528 return true;
43529
43530 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43531 if (nelt >= 4)
43532 {
43533 for (i = 0; i < nelt; i += 4)
43534 {
43535 perm2[i + 0] = d->perm[i + 0] & mask;
43536 perm2[i + 1] = d->perm[i + 1] & mask;
43537 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43538 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43539 }
43540
43541 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43542 d->testing_p))
43543 return true;
43544 }
43545 }
43546
43547 /* Finally, try the fully general two operand permute. */
43548 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43549 d->testing_p))
43550 return true;
43551
43552 /* Recognize interleave style patterns with reversed operands. */
43553 if (!d->one_operand_p)
43554 {
43555 for (i = 0; i < nelt; ++i)
43556 {
43557 unsigned e = d->perm[i];
43558 if (e >= nelt)
43559 e -= nelt;
43560 else
43561 e += nelt;
43562 perm2[i] = e;
43563 }
43564
43565 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43566 d->testing_p))
43567 return true;
43568 }
43569
43570 /* Try the SSE4.1 blend variable merge instructions. */
43571 if (expand_vec_perm_blend (d))
43572 return true;
43573
43574 /* Try one of the AVX vpermil variable permutations. */
43575 if (expand_vec_perm_vpermil (d))
43576 return true;
43577
43578 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43579 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43580 if (expand_vec_perm_pshufb (d))
43581 return true;
43582
43583 /* Try the AVX2 vpalignr instruction. */
43584 if (expand_vec_perm_palignr (d, true))
43585 return true;
43586
43587 /* Try the AVX512F vpermi2 instructions. */
43588 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
43589 return true;
43590
43591 return false;
43592 }
43593
43594 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43595 in terms of a pair of pshuflw + pshufhw instructions. */
43596
43597 static bool
43598 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43599 {
43600 unsigned char perm2[MAX_VECT_LEN];
43601 unsigned i;
43602 bool ok;
43603
43604 if (d->vmode != V8HImode || !d->one_operand_p)
43605 return false;
43606
43607 /* The two permutations only operate in 64-bit lanes. */
43608 for (i = 0; i < 4; ++i)
43609 if (d->perm[i] >= 4)
43610 return false;
43611 for (i = 4; i < 8; ++i)
43612 if (d->perm[i] < 4)
43613 return false;
43614
43615 if (d->testing_p)
43616 return true;
43617
43618 /* Emit the pshuflw. */
43619 memcpy (perm2, d->perm, 4);
43620 for (i = 4; i < 8; ++i)
43621 perm2[i] = i;
43622 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43623 gcc_assert (ok);
43624
43625 /* Emit the pshufhw. */
43626 memcpy (perm2 + 4, d->perm + 4, 4);
43627 for (i = 0; i < 4; ++i)
43628 perm2[i] = i;
43629 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43630 gcc_assert (ok);
43631
43632 return true;
43633 }
43634
43635 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43636 the permutation using the SSSE3 palignr instruction. This succeeds
43637 when all of the elements in PERM fit within one vector and we merely
43638 need to shift them down so that a single vector permutation has a
43639 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
43640 the vpalignr instruction itself can perform the requested permutation. */
43641
43642 static bool
43643 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
43644 {
43645 unsigned i, nelt = d->nelt;
43646 unsigned min, max, minswap, maxswap;
43647 bool in_order, ok, swap = false;
43648 rtx shift, target;
43649 struct expand_vec_perm_d dcopy;
43650
43651 /* Even with AVX, palignr only operates on 128-bit vectors,
43652 in AVX2 palignr operates on both 128-bit lanes. */
43653 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43654 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
43655 return false;
43656
43657 min = 2 * nelt;
43658 max = 0;
43659 minswap = 2 * nelt;
43660 maxswap = 0;
43661 for (i = 0; i < nelt; ++i)
43662 {
43663 unsigned e = d->perm[i];
43664 unsigned eswap = d->perm[i] ^ nelt;
43665 if (GET_MODE_SIZE (d->vmode) == 32)
43666 {
43667 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
43668 eswap = e ^ (nelt / 2);
43669 }
43670 if (e < min)
43671 min = e;
43672 if (e > max)
43673 max = e;
43674 if (eswap < minswap)
43675 minswap = eswap;
43676 if (eswap > maxswap)
43677 maxswap = eswap;
43678 }
43679 if (min == 0
43680 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
43681 {
43682 if (d->one_operand_p
43683 || minswap == 0
43684 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
43685 ? nelt / 2 : nelt))
43686 return false;
43687 swap = true;
43688 min = minswap;
43689 max = maxswap;
43690 }
43691
43692 /* Given that we have SSSE3, we know we'll be able to implement the
43693 single operand permutation after the palignr with pshufb for
43694 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
43695 first. */
43696 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
43697 return true;
43698
43699 dcopy = *d;
43700 if (swap)
43701 {
43702 dcopy.op0 = d->op1;
43703 dcopy.op1 = d->op0;
43704 for (i = 0; i < nelt; ++i)
43705 dcopy.perm[i] ^= nelt;
43706 }
43707
43708 in_order = true;
43709 for (i = 0; i < nelt; ++i)
43710 {
43711 unsigned e = dcopy.perm[i];
43712 if (GET_MODE_SIZE (d->vmode) == 32
43713 && e >= nelt
43714 && (e & (nelt / 2 - 1)) < min)
43715 e = e - min - (nelt / 2);
43716 else
43717 e = e - min;
43718 if (e != i)
43719 in_order = false;
43720 dcopy.perm[i] = e;
43721 }
43722 dcopy.one_operand_p = true;
43723
43724 if (single_insn_only_p && !in_order)
43725 return false;
43726
43727 /* For AVX2, test whether we can permute the result in one instruction. */
43728 if (d->testing_p)
43729 {
43730 if (in_order)
43731 return true;
43732 dcopy.op1 = dcopy.op0;
43733 return expand_vec_perm_1 (&dcopy);
43734 }
43735
43736 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43737 if (GET_MODE_SIZE (d->vmode) == 16)
43738 {
43739 target = gen_reg_rtx (TImode);
43740 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
43741 gen_lowpart (TImode, dcopy.op0), shift));
43742 }
43743 else
43744 {
43745 target = gen_reg_rtx (V2TImode);
43746 emit_insn (gen_avx2_palignrv2ti (target,
43747 gen_lowpart (V2TImode, dcopy.op1),
43748 gen_lowpart (V2TImode, dcopy.op0),
43749 shift));
43750 }
43751
43752 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43753
43754 /* Test for the degenerate case where the alignment by itself
43755 produces the desired permutation. */
43756 if (in_order)
43757 {
43758 emit_move_insn (d->target, dcopy.op0);
43759 return true;
43760 }
43761
43762 ok = expand_vec_perm_1 (&dcopy);
43763 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
43764
43765 return ok;
43766 }
43767
43768 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43769 the permutation using the SSE4_1 pblendv instruction. Potentially
43770 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
43771
43772 static bool
43773 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43774 {
43775 unsigned i, which, nelt = d->nelt;
43776 struct expand_vec_perm_d dcopy, dcopy1;
43777 enum machine_mode vmode = d->vmode;
43778 bool ok;
43779
43780 /* Use the same checks as in expand_vec_perm_blend. */
43781 if (d->one_operand_p)
43782 return false;
43783 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
43784 ;
43785 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43786 ;
43787 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43788 ;
43789 else
43790 return false;
43791
43792 /* Figure out where permutation elements stay not in their
43793 respective lanes. */
43794 for (i = 0, which = 0; i < nelt; ++i)
43795 {
43796 unsigned e = d->perm[i];
43797 if (e != i)
43798 which |= (e < nelt ? 1 : 2);
43799 }
43800 /* We can pblend the part where elements stay not in their
43801 respective lanes only when these elements are all in one
43802 half of a permutation.
43803 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43804 lanes, but both 8 and 9 >= 8
43805 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43806 respective lanes and 8 >= 8, but 2 not. */
43807 if (which != 1 && which != 2)
43808 return false;
43809 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
43810 return true;
43811
43812 /* First we apply one operand permutation to the part where
43813 elements stay not in their respective lanes. */
43814 dcopy = *d;
43815 if (which == 2)
43816 dcopy.op0 = dcopy.op1 = d->op1;
43817 else
43818 dcopy.op0 = dcopy.op1 = d->op0;
43819 dcopy.one_operand_p = true;
43820
43821 for (i = 0; i < nelt; ++i)
43822 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43823
43824 ok = expand_vec_perm_1 (&dcopy);
43825 if (GET_MODE_SIZE (vmode) != 16 && !ok)
43826 return false;
43827 else
43828 gcc_assert (ok);
43829 if (d->testing_p)
43830 return true;
43831
43832 /* Next we put permuted elements into their positions. */
43833 dcopy1 = *d;
43834 if (which == 2)
43835 dcopy1.op1 = dcopy.target;
43836 else
43837 dcopy1.op0 = dcopy.target;
43838
43839 for (i = 0; i < nelt; ++i)
43840 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43841
43842 ok = expand_vec_perm_blend (&dcopy1);
43843 gcc_assert (ok);
43844
43845 return true;
43846 }
43847
43848 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43849
43850 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43851 a two vector permutation into a single vector permutation by using
43852 an interleave operation to merge the vectors. */
43853
43854 static bool
43855 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43856 {
43857 struct expand_vec_perm_d dremap, dfinal;
43858 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43859 unsigned HOST_WIDE_INT contents;
43860 unsigned char remap[2 * MAX_VECT_LEN];
43861 rtx_insn *seq;
43862 bool ok, same_halves = false;
43863
43864 if (GET_MODE_SIZE (d->vmode) == 16)
43865 {
43866 if (d->one_operand_p)
43867 return false;
43868 }
43869 else if (GET_MODE_SIZE (d->vmode) == 32)
43870 {
43871 if (!TARGET_AVX)
43872 return false;
43873 /* For 32-byte modes allow even d->one_operand_p.
43874 The lack of cross-lane shuffling in some instructions
43875 might prevent a single insn shuffle. */
43876 dfinal = *d;
43877 dfinal.testing_p = true;
43878 /* If expand_vec_perm_interleave3 can expand this into
43879 a 3 insn sequence, give up and let it be expanded as
43880 3 insn sequence. While that is one insn longer,
43881 it doesn't need a memory operand and in the common
43882 case that both interleave low and high permutations
43883 with the same operands are adjacent needs 4 insns
43884 for both after CSE. */
43885 if (expand_vec_perm_interleave3 (&dfinal))
43886 return false;
43887 }
43888 else
43889 return false;
43890
43891 /* Examine from whence the elements come. */
43892 contents = 0;
43893 for (i = 0; i < nelt; ++i)
43894 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43895
43896 memset (remap, 0xff, sizeof (remap));
43897 dremap = *d;
43898
43899 if (GET_MODE_SIZE (d->vmode) == 16)
43900 {
43901 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43902
43903 /* Split the two input vectors into 4 halves. */
43904 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43905 h2 = h1 << nelt2;
43906 h3 = h2 << nelt2;
43907 h4 = h3 << nelt2;
43908
43909 /* If the elements from the low halves use interleave low, and similarly
43910 for interleave high. If the elements are from mis-matched halves, we
43911 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43912 if ((contents & (h1 | h3)) == contents)
43913 {
43914 /* punpckl* */
43915 for (i = 0; i < nelt2; ++i)
43916 {
43917 remap[i] = i * 2;
43918 remap[i + nelt] = i * 2 + 1;
43919 dremap.perm[i * 2] = i;
43920 dremap.perm[i * 2 + 1] = i + nelt;
43921 }
43922 if (!TARGET_SSE2 && d->vmode == V4SImode)
43923 dremap.vmode = V4SFmode;
43924 }
43925 else if ((contents & (h2 | h4)) == contents)
43926 {
43927 /* punpckh* */
43928 for (i = 0; i < nelt2; ++i)
43929 {
43930 remap[i + nelt2] = i * 2;
43931 remap[i + nelt + nelt2] = i * 2 + 1;
43932 dremap.perm[i * 2] = i + nelt2;
43933 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43934 }
43935 if (!TARGET_SSE2 && d->vmode == V4SImode)
43936 dremap.vmode = V4SFmode;
43937 }
43938 else if ((contents & (h1 | h4)) == contents)
43939 {
43940 /* shufps */
43941 for (i = 0; i < nelt2; ++i)
43942 {
43943 remap[i] = i;
43944 remap[i + nelt + nelt2] = i + nelt2;
43945 dremap.perm[i] = i;
43946 dremap.perm[i + nelt2] = i + nelt + nelt2;
43947 }
43948 if (nelt != 4)
43949 {
43950 /* shufpd */
43951 dremap.vmode = V2DImode;
43952 dremap.nelt = 2;
43953 dremap.perm[0] = 0;
43954 dremap.perm[1] = 3;
43955 }
43956 }
43957 else if ((contents & (h2 | h3)) == contents)
43958 {
43959 /* shufps */
43960 for (i = 0; i < nelt2; ++i)
43961 {
43962 remap[i + nelt2] = i;
43963 remap[i + nelt] = i + nelt2;
43964 dremap.perm[i] = i + nelt2;
43965 dremap.perm[i + nelt2] = i + nelt;
43966 }
43967 if (nelt != 4)
43968 {
43969 /* shufpd */
43970 dremap.vmode = V2DImode;
43971 dremap.nelt = 2;
43972 dremap.perm[0] = 1;
43973 dremap.perm[1] = 2;
43974 }
43975 }
43976 else
43977 return false;
43978 }
43979 else
43980 {
43981 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43982 unsigned HOST_WIDE_INT q[8];
43983 unsigned int nonzero_halves[4];
43984
43985 /* Split the two input vectors into 8 quarters. */
43986 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43987 for (i = 1; i < 8; ++i)
43988 q[i] = q[0] << (nelt4 * i);
43989 for (i = 0; i < 4; ++i)
43990 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43991 {
43992 nonzero_halves[nzcnt] = i;
43993 ++nzcnt;
43994 }
43995
43996 if (nzcnt == 1)
43997 {
43998 gcc_assert (d->one_operand_p);
43999 nonzero_halves[1] = nonzero_halves[0];
44000 same_halves = true;
44001 }
44002 else if (d->one_operand_p)
44003 {
44004 gcc_assert (nonzero_halves[0] == 0);
44005 gcc_assert (nonzero_halves[1] == 1);
44006 }
44007
44008 if (nzcnt <= 2)
44009 {
44010 if (d->perm[0] / nelt2 == nonzero_halves[1])
44011 {
44012 /* Attempt to increase the likelihood that dfinal
44013 shuffle will be intra-lane. */
44014 char tmph = nonzero_halves[0];
44015 nonzero_halves[0] = nonzero_halves[1];
44016 nonzero_halves[1] = tmph;
44017 }
44018
44019 /* vperm2f128 or vperm2i128. */
44020 for (i = 0; i < nelt2; ++i)
44021 {
44022 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
44023 remap[i + nonzero_halves[0] * nelt2] = i;
44024 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
44025 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
44026 }
44027
44028 if (d->vmode != V8SFmode
44029 && d->vmode != V4DFmode
44030 && d->vmode != V8SImode)
44031 {
44032 dremap.vmode = V8SImode;
44033 dremap.nelt = 8;
44034 for (i = 0; i < 4; ++i)
44035 {
44036 dremap.perm[i] = i + nonzero_halves[0] * 4;
44037 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
44038 }
44039 }
44040 }
44041 else if (d->one_operand_p)
44042 return false;
44043 else if (TARGET_AVX2
44044 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
44045 {
44046 /* vpunpckl* */
44047 for (i = 0; i < nelt4; ++i)
44048 {
44049 remap[i] = i * 2;
44050 remap[i + nelt] = i * 2 + 1;
44051 remap[i + nelt2] = i * 2 + nelt2;
44052 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
44053 dremap.perm[i * 2] = i;
44054 dremap.perm[i * 2 + 1] = i + nelt;
44055 dremap.perm[i * 2 + nelt2] = i + nelt2;
44056 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
44057 }
44058 }
44059 else if (TARGET_AVX2
44060 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
44061 {
44062 /* vpunpckh* */
44063 for (i = 0; i < nelt4; ++i)
44064 {
44065 remap[i + nelt4] = i * 2;
44066 remap[i + nelt + nelt4] = i * 2 + 1;
44067 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
44068 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
44069 dremap.perm[i * 2] = i + nelt4;
44070 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
44071 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
44072 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
44073 }
44074 }
44075 else
44076 return false;
44077 }
44078
44079 /* Use the remapping array set up above to move the elements from their
44080 swizzled locations into their final destinations. */
44081 dfinal = *d;
44082 for (i = 0; i < nelt; ++i)
44083 {
44084 unsigned e = remap[d->perm[i]];
44085 gcc_assert (e < nelt);
44086 /* If same_halves is true, both halves of the remapped vector are the
44087 same. Avoid cross-lane accesses if possible. */
44088 if (same_halves && i >= nelt2)
44089 {
44090 gcc_assert (e < nelt2);
44091 dfinal.perm[i] = e + nelt2;
44092 }
44093 else
44094 dfinal.perm[i] = e;
44095 }
44096 if (!d->testing_p)
44097 {
44098 dremap.target = gen_reg_rtx (dremap.vmode);
44099 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44100 }
44101 dfinal.op1 = dfinal.op0;
44102 dfinal.one_operand_p = true;
44103
44104 /* Test if the final remap can be done with a single insn. For V4SFmode or
44105 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
44106 start_sequence ();
44107 ok = expand_vec_perm_1 (&dfinal);
44108 seq = get_insns ();
44109 end_sequence ();
44110
44111 if (!ok)
44112 return false;
44113
44114 if (d->testing_p)
44115 return true;
44116
44117 if (dremap.vmode != dfinal.vmode)
44118 {
44119 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
44120 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
44121 }
44122
44123 ok = expand_vec_perm_1 (&dremap);
44124 gcc_assert (ok);
44125
44126 emit_insn (seq);
44127 return true;
44128 }
44129
44130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44131 a single vector cross-lane permutation into vpermq followed
44132 by any of the single insn permutations. */
44133
44134 static bool
44135 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
44136 {
44137 struct expand_vec_perm_d dremap, dfinal;
44138 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
44139 unsigned contents[2];
44140 bool ok;
44141
44142 if (!(TARGET_AVX2
44143 && (d->vmode == V32QImode || d->vmode == V16HImode)
44144 && d->one_operand_p))
44145 return false;
44146
44147 contents[0] = 0;
44148 contents[1] = 0;
44149 for (i = 0; i < nelt2; ++i)
44150 {
44151 contents[0] |= 1u << (d->perm[i] / nelt4);
44152 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
44153 }
44154
44155 for (i = 0; i < 2; ++i)
44156 {
44157 unsigned int cnt = 0;
44158 for (j = 0; j < 4; ++j)
44159 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
44160 return false;
44161 }
44162
44163 if (d->testing_p)
44164 return true;
44165
44166 dremap = *d;
44167 dremap.vmode = V4DImode;
44168 dremap.nelt = 4;
44169 dremap.target = gen_reg_rtx (V4DImode);
44170 dremap.op0 = gen_lowpart (V4DImode, d->op0);
44171 dremap.op1 = dremap.op0;
44172 dremap.one_operand_p = true;
44173 for (i = 0; i < 2; ++i)
44174 {
44175 unsigned int cnt = 0;
44176 for (j = 0; j < 4; ++j)
44177 if ((contents[i] & (1u << j)) != 0)
44178 dremap.perm[2 * i + cnt++] = j;
44179 for (; cnt < 2; ++cnt)
44180 dremap.perm[2 * i + cnt] = 0;
44181 }
44182
44183 dfinal = *d;
44184 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44185 dfinal.op1 = dfinal.op0;
44186 dfinal.one_operand_p = true;
44187 for (i = 0, j = 0; i < nelt; ++i)
44188 {
44189 if (i == nelt2)
44190 j = 2;
44191 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
44192 if ((d->perm[i] / nelt4) == dremap.perm[j])
44193 ;
44194 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
44195 dfinal.perm[i] |= nelt4;
44196 else
44197 gcc_unreachable ();
44198 }
44199
44200 ok = expand_vec_perm_1 (&dremap);
44201 gcc_assert (ok);
44202
44203 ok = expand_vec_perm_1 (&dfinal);
44204 gcc_assert (ok);
44205
44206 return true;
44207 }
44208
44209 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
44210 a vector permutation using two instructions, vperm2f128 resp.
44211 vperm2i128 followed by any single in-lane permutation. */
44212
44213 static bool
44214 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
44215 {
44216 struct expand_vec_perm_d dfirst, dsecond;
44217 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
44218 bool ok;
44219
44220 if (!TARGET_AVX
44221 || GET_MODE_SIZE (d->vmode) != 32
44222 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
44223 return false;
44224
44225 dsecond = *d;
44226 dsecond.one_operand_p = false;
44227 dsecond.testing_p = true;
44228
44229 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
44230 immediate. For perm < 16 the second permutation uses
44231 d->op0 as first operand, for perm >= 16 it uses d->op1
44232 as first operand. The second operand is the result of
44233 vperm2[fi]128. */
44234 for (perm = 0; perm < 32; perm++)
44235 {
44236 /* Ignore permutations which do not move anything cross-lane. */
44237 if (perm < 16)
44238 {
44239 /* The second shuffle for e.g. V4DFmode has
44240 0123 and ABCD operands.
44241 Ignore AB23, as 23 is already in the second lane
44242 of the first operand. */
44243 if ((perm & 0xc) == (1 << 2)) continue;
44244 /* And 01CD, as 01 is in the first lane of the first
44245 operand. */
44246 if ((perm & 3) == 0) continue;
44247 /* And 4567, as then the vperm2[fi]128 doesn't change
44248 anything on the original 4567 second operand. */
44249 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
44250 }
44251 else
44252 {
44253 /* The second shuffle for e.g. V4DFmode has
44254 4567 and ABCD operands.
44255 Ignore AB67, as 67 is already in the second lane
44256 of the first operand. */
44257 if ((perm & 0xc) == (3 << 2)) continue;
44258 /* And 45CD, as 45 is in the first lane of the first
44259 operand. */
44260 if ((perm & 3) == 2) continue;
44261 /* And 0123, as then the vperm2[fi]128 doesn't change
44262 anything on the original 0123 first operand. */
44263 if ((perm & 0xf) == (1 << 2)) continue;
44264 }
44265
44266 for (i = 0; i < nelt; i++)
44267 {
44268 j = d->perm[i] / nelt2;
44269 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
44270 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
44271 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
44272 dsecond.perm[i] = d->perm[i] & (nelt - 1);
44273 else
44274 break;
44275 }
44276
44277 if (i == nelt)
44278 {
44279 start_sequence ();
44280 ok = expand_vec_perm_1 (&dsecond);
44281 end_sequence ();
44282 }
44283 else
44284 ok = false;
44285
44286 if (ok)
44287 {
44288 if (d->testing_p)
44289 return true;
44290
44291 /* Found a usable second shuffle. dfirst will be
44292 vperm2f128 on d->op0 and d->op1. */
44293 dsecond.testing_p = false;
44294 dfirst = *d;
44295 dfirst.target = gen_reg_rtx (d->vmode);
44296 for (i = 0; i < nelt; i++)
44297 dfirst.perm[i] = (i & (nelt2 - 1))
44298 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
44299
44300 canonicalize_perm (&dfirst);
44301 ok = expand_vec_perm_1 (&dfirst);
44302 gcc_assert (ok);
44303
44304 /* And dsecond is some single insn shuffle, taking
44305 d->op0 and result of vperm2f128 (if perm < 16) or
44306 d->op1 and result of vperm2f128 (otherwise). */
44307 if (perm >= 16)
44308 dsecond.op0 = dsecond.op1;
44309 dsecond.op1 = dfirst.target;
44310
44311 ok = expand_vec_perm_1 (&dsecond);
44312 gcc_assert (ok);
44313
44314 return true;
44315 }
44316
44317 /* For one operand, the only useful vperm2f128 permutation is 0x01
44318 aka lanes swap. */
44319 if (d->one_operand_p)
44320 return false;
44321 }
44322
44323 return false;
44324 }
44325
44326 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44327 a two vector permutation using 2 intra-lane interleave insns
44328 and cross-lane shuffle for 32-byte vectors. */
44329
44330 static bool
44331 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
44332 {
44333 unsigned i, nelt;
44334 rtx (*gen) (rtx, rtx, rtx);
44335
44336 if (d->one_operand_p)
44337 return false;
44338 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
44339 ;
44340 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
44341 ;
44342 else
44343 return false;
44344
44345 nelt = d->nelt;
44346 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
44347 return false;
44348 for (i = 0; i < nelt; i += 2)
44349 if (d->perm[i] != d->perm[0] + i / 2
44350 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
44351 return false;
44352
44353 if (d->testing_p)
44354 return true;
44355
44356 switch (d->vmode)
44357 {
44358 case V32QImode:
44359 if (d->perm[0])
44360 gen = gen_vec_interleave_highv32qi;
44361 else
44362 gen = gen_vec_interleave_lowv32qi;
44363 break;
44364 case V16HImode:
44365 if (d->perm[0])
44366 gen = gen_vec_interleave_highv16hi;
44367 else
44368 gen = gen_vec_interleave_lowv16hi;
44369 break;
44370 case V8SImode:
44371 if (d->perm[0])
44372 gen = gen_vec_interleave_highv8si;
44373 else
44374 gen = gen_vec_interleave_lowv8si;
44375 break;
44376 case V4DImode:
44377 if (d->perm[0])
44378 gen = gen_vec_interleave_highv4di;
44379 else
44380 gen = gen_vec_interleave_lowv4di;
44381 break;
44382 case V8SFmode:
44383 if (d->perm[0])
44384 gen = gen_vec_interleave_highv8sf;
44385 else
44386 gen = gen_vec_interleave_lowv8sf;
44387 break;
44388 case V4DFmode:
44389 if (d->perm[0])
44390 gen = gen_vec_interleave_highv4df;
44391 else
44392 gen = gen_vec_interleave_lowv4df;
44393 break;
44394 default:
44395 gcc_unreachable ();
44396 }
44397
44398 emit_insn (gen (d->target, d->op0, d->op1));
44399 return true;
44400 }
44401
44402 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
44403 a single vector permutation using a single intra-lane vector
44404 permutation, vperm2f128 swapping the lanes and vblend* insn blending
44405 the non-swapped and swapped vectors together. */
44406
44407 static bool
44408 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
44409 {
44410 struct expand_vec_perm_d dfirst, dsecond;
44411 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
44412 rtx_insn *seq;
44413 bool ok;
44414 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
44415
44416 if (!TARGET_AVX
44417 || TARGET_AVX2
44418 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
44419 || !d->one_operand_p)
44420 return false;
44421
44422 dfirst = *d;
44423 for (i = 0; i < nelt; i++)
44424 dfirst.perm[i] = 0xff;
44425 for (i = 0, msk = 0; i < nelt; i++)
44426 {
44427 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44428 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44429 return false;
44430 dfirst.perm[j] = d->perm[i];
44431 if (j != i)
44432 msk |= (1 << i);
44433 }
44434 for (i = 0; i < nelt; i++)
44435 if (dfirst.perm[i] == 0xff)
44436 dfirst.perm[i] = i;
44437
44438 if (!d->testing_p)
44439 dfirst.target = gen_reg_rtx (dfirst.vmode);
44440
44441 start_sequence ();
44442 ok = expand_vec_perm_1 (&dfirst);
44443 seq = get_insns ();
44444 end_sequence ();
44445
44446 if (!ok)
44447 return false;
44448
44449 if (d->testing_p)
44450 return true;
44451
44452 emit_insn (seq);
44453
44454 dsecond = *d;
44455 dsecond.op0 = dfirst.target;
44456 dsecond.op1 = dfirst.target;
44457 dsecond.one_operand_p = true;
44458 dsecond.target = gen_reg_rtx (dsecond.vmode);
44459 for (i = 0; i < nelt; i++)
44460 dsecond.perm[i] = i ^ nelt2;
44461
44462 ok = expand_vec_perm_1 (&dsecond);
44463 gcc_assert (ok);
44464
44465 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44466 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44467 return true;
44468 }
44469
44470 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44471 permutation using two vperm2f128, followed by a vshufpd insn blending
44472 the two vectors together. */
44473
44474 static bool
44475 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44476 {
44477 struct expand_vec_perm_d dfirst, dsecond, dthird;
44478 bool ok;
44479
44480 if (!TARGET_AVX || (d->vmode != V4DFmode))
44481 return false;
44482
44483 if (d->testing_p)
44484 return true;
44485
44486 dfirst = *d;
44487 dsecond = *d;
44488 dthird = *d;
44489
44490 dfirst.perm[0] = (d->perm[0] & ~1);
44491 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44492 dfirst.perm[2] = (d->perm[2] & ~1);
44493 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44494 dsecond.perm[0] = (d->perm[1] & ~1);
44495 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44496 dsecond.perm[2] = (d->perm[3] & ~1);
44497 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44498 dthird.perm[0] = (d->perm[0] % 2);
44499 dthird.perm[1] = (d->perm[1] % 2) + 4;
44500 dthird.perm[2] = (d->perm[2] % 2) + 2;
44501 dthird.perm[3] = (d->perm[3] % 2) + 6;
44502
44503 dfirst.target = gen_reg_rtx (dfirst.vmode);
44504 dsecond.target = gen_reg_rtx (dsecond.vmode);
44505 dthird.op0 = dfirst.target;
44506 dthird.op1 = dsecond.target;
44507 dthird.one_operand_p = false;
44508
44509 canonicalize_perm (&dfirst);
44510 canonicalize_perm (&dsecond);
44511
44512 ok = expand_vec_perm_1 (&dfirst)
44513 && expand_vec_perm_1 (&dsecond)
44514 && expand_vec_perm_1 (&dthird);
44515
44516 gcc_assert (ok);
44517
44518 return true;
44519 }
44520
44521 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44522 permutation with two pshufb insns and an ior. We should have already
44523 failed all two instruction sequences. */
44524
44525 static bool
44526 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44527 {
44528 rtx rperm[2][16], vperm, l, h, op, m128;
44529 unsigned int i, nelt, eltsz;
44530
44531 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44532 return false;
44533 gcc_assert (!d->one_operand_p);
44534
44535 if (d->testing_p)
44536 return true;
44537
44538 nelt = d->nelt;
44539 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44540
44541 /* Generate two permutation masks. If the required element is within
44542 the given vector it is shuffled into the proper lane. If the required
44543 element is in the other vector, force a zero into the lane by setting
44544 bit 7 in the permutation mask. */
44545 m128 = GEN_INT (-128);
44546 for (i = 0; i < nelt; ++i)
44547 {
44548 unsigned j, e = d->perm[i];
44549 unsigned which = (e >= nelt);
44550 if (e >= nelt)
44551 e -= nelt;
44552
44553 for (j = 0; j < eltsz; ++j)
44554 {
44555 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44556 rperm[1-which][i*eltsz + j] = m128;
44557 }
44558 }
44559
44560 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44561 vperm = force_reg (V16QImode, vperm);
44562
44563 l = gen_reg_rtx (V16QImode);
44564 op = gen_lowpart (V16QImode, d->op0);
44565 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44566
44567 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44568 vperm = force_reg (V16QImode, vperm);
44569
44570 h = gen_reg_rtx (V16QImode);
44571 op = gen_lowpart (V16QImode, d->op1);
44572 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44573
44574 op = d->target;
44575 if (d->vmode != V16QImode)
44576 op = gen_reg_rtx (V16QImode);
44577 emit_insn (gen_iorv16qi3 (op, l, h));
44578 if (op != d->target)
44579 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44580
44581 return true;
44582 }
44583
44584 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44585 with two vpshufb insns, vpermq and vpor. We should have already failed
44586 all two or three instruction sequences. */
44587
44588 static bool
44589 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44590 {
44591 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44592 unsigned int i, nelt, eltsz;
44593
44594 if (!TARGET_AVX2
44595 || !d->one_operand_p
44596 || (d->vmode != V32QImode && d->vmode != V16HImode))
44597 return false;
44598
44599 if (d->testing_p)
44600 return true;
44601
44602 nelt = d->nelt;
44603 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44604
44605 /* Generate two permutation masks. If the required element is within
44606 the same lane, it is shuffled in. If the required element from the
44607 other lane, force a zero by setting bit 7 in the permutation mask.
44608 In the other mask the mask has non-negative elements if element
44609 is requested from the other lane, but also moved to the other lane,
44610 so that the result of vpshufb can have the two V2TImode halves
44611 swapped. */
44612 m128 = GEN_INT (-128);
44613 for (i = 0; i < nelt; ++i)
44614 {
44615 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44616 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44617
44618 for (j = 0; j < eltsz; ++j)
44619 {
44620 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44621 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44622 }
44623 }
44624
44625 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44626 vperm = force_reg (V32QImode, vperm);
44627
44628 h = gen_reg_rtx (V32QImode);
44629 op = gen_lowpart (V32QImode, d->op0);
44630 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44631
44632 /* Swap the 128-byte lanes of h into hp. */
44633 hp = gen_reg_rtx (V4DImode);
44634 op = gen_lowpart (V4DImode, h);
44635 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44636 const1_rtx));
44637
44638 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44639 vperm = force_reg (V32QImode, vperm);
44640
44641 l = gen_reg_rtx (V32QImode);
44642 op = gen_lowpart (V32QImode, d->op0);
44643 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44644
44645 op = d->target;
44646 if (d->vmode != V32QImode)
44647 op = gen_reg_rtx (V32QImode);
44648 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44649 if (op != d->target)
44650 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44651
44652 return true;
44653 }
44654
44655 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44656 and extract-odd permutations of two V32QImode and V16QImode operand
44657 with two vpshufb insns, vpor and vpermq. We should have already
44658 failed all two or three instruction sequences. */
44659
44660 static bool
44661 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44662 {
44663 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44664 unsigned int i, nelt, eltsz;
44665
44666 if (!TARGET_AVX2
44667 || d->one_operand_p
44668 || (d->vmode != V32QImode && d->vmode != V16HImode))
44669 return false;
44670
44671 for (i = 0; i < d->nelt; ++i)
44672 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44673 return false;
44674
44675 if (d->testing_p)
44676 return true;
44677
44678 nelt = d->nelt;
44679 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44680
44681 /* Generate two permutation masks. In the first permutation mask
44682 the first quarter will contain indexes for the first half
44683 of the op0, the second quarter will contain bit 7 set, third quarter
44684 will contain indexes for the second half of the op0 and the
44685 last quarter bit 7 set. In the second permutation mask
44686 the first quarter will contain bit 7 set, the second quarter
44687 indexes for the first half of the op1, the third quarter bit 7 set
44688 and last quarter indexes for the second half of the op1.
44689 I.e. the first mask e.g. for V32QImode extract even will be:
44690 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44691 (all values masked with 0xf except for -128) and second mask
44692 for extract even will be
44693 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44694 m128 = GEN_INT (-128);
44695 for (i = 0; i < nelt; ++i)
44696 {
44697 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44698 unsigned which = d->perm[i] >= nelt;
44699 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44700
44701 for (j = 0; j < eltsz; ++j)
44702 {
44703 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44704 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44705 }
44706 }
44707
44708 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44709 vperm = force_reg (V32QImode, vperm);
44710
44711 l = gen_reg_rtx (V32QImode);
44712 op = gen_lowpart (V32QImode, d->op0);
44713 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44714
44715 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44716 vperm = force_reg (V32QImode, vperm);
44717
44718 h = gen_reg_rtx (V32QImode);
44719 op = gen_lowpart (V32QImode, d->op1);
44720 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44721
44722 ior = gen_reg_rtx (V32QImode);
44723 emit_insn (gen_iorv32qi3 (ior, l, h));
44724
44725 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44726 op = gen_reg_rtx (V4DImode);
44727 ior = gen_lowpart (V4DImode, ior);
44728 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44729 const1_rtx, GEN_INT (3)));
44730 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44731
44732 return true;
44733 }
44734
44735 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44736 and extract-odd permutations. */
44737
44738 static bool
44739 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44740 {
44741 rtx t1, t2, t3, t4, t5;
44742
44743 switch (d->vmode)
44744 {
44745 case V4DFmode:
44746 if (d->testing_p)
44747 break;
44748 t1 = gen_reg_rtx (V4DFmode);
44749 t2 = gen_reg_rtx (V4DFmode);
44750
44751 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44752 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44753 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44754
44755 /* Now an unpck[lh]pd will produce the result required. */
44756 if (odd)
44757 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44758 else
44759 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44760 emit_insn (t3);
44761 break;
44762
44763 case V8SFmode:
44764 {
44765 int mask = odd ? 0xdd : 0x88;
44766
44767 if (d->testing_p)
44768 break;
44769 t1 = gen_reg_rtx (V8SFmode);
44770 t2 = gen_reg_rtx (V8SFmode);
44771 t3 = gen_reg_rtx (V8SFmode);
44772
44773 /* Shuffle within the 128-bit lanes to produce:
44774 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44775 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44776 GEN_INT (mask)));
44777
44778 /* Shuffle the lanes around to produce:
44779 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44780 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44781 GEN_INT (0x3)));
44782
44783 /* Shuffle within the 128-bit lanes to produce:
44784 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44785 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44786
44787 /* Shuffle within the 128-bit lanes to produce:
44788 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44789 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44790
44791 /* Shuffle the lanes around to produce:
44792 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44793 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44794 GEN_INT (0x20)));
44795 }
44796 break;
44797
44798 case V2DFmode:
44799 case V4SFmode:
44800 case V2DImode:
44801 case V4SImode:
44802 /* These are always directly implementable by expand_vec_perm_1. */
44803 gcc_unreachable ();
44804
44805 case V8HImode:
44806 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44807 return expand_vec_perm_pshufb2 (d);
44808 else
44809 {
44810 if (d->testing_p)
44811 break;
44812 /* We need 2*log2(N)-1 operations to achieve odd/even
44813 with interleave. */
44814 t1 = gen_reg_rtx (V8HImode);
44815 t2 = gen_reg_rtx (V8HImode);
44816 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44817 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44818 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44819 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44820 if (odd)
44821 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44822 else
44823 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44824 emit_insn (t3);
44825 }
44826 break;
44827
44828 case V16QImode:
44829 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44830 return expand_vec_perm_pshufb2 (d);
44831 else
44832 {
44833 if (d->testing_p)
44834 break;
44835 t1 = gen_reg_rtx (V16QImode);
44836 t2 = gen_reg_rtx (V16QImode);
44837 t3 = gen_reg_rtx (V16QImode);
44838 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44839 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44840 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44841 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44842 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44843 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44844 if (odd)
44845 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44846 else
44847 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44848 emit_insn (t3);
44849 }
44850 break;
44851
44852 case V16HImode:
44853 case V32QImode:
44854 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44855
44856 case V4DImode:
44857 if (!TARGET_AVX2)
44858 {
44859 struct expand_vec_perm_d d_copy = *d;
44860 d_copy.vmode = V4DFmode;
44861 if (d->testing_p)
44862 d_copy.target = gen_lowpart (V4DFmode, d->target);
44863 else
44864 d_copy.target = gen_reg_rtx (V4DFmode);
44865 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44866 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44867 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44868 {
44869 if (!d->testing_p)
44870 emit_move_insn (d->target,
44871 gen_lowpart (V4DImode, d_copy.target));
44872 return true;
44873 }
44874 return false;
44875 }
44876
44877 if (d->testing_p)
44878 break;
44879
44880 t1 = gen_reg_rtx (V4DImode);
44881 t2 = gen_reg_rtx (V4DImode);
44882
44883 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44884 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44885 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44886
44887 /* Now an vpunpck[lh]qdq will produce the result required. */
44888 if (odd)
44889 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44890 else
44891 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44892 emit_insn (t3);
44893 break;
44894
44895 case V8SImode:
44896 if (!TARGET_AVX2)
44897 {
44898 struct expand_vec_perm_d d_copy = *d;
44899 d_copy.vmode = V8SFmode;
44900 if (d->testing_p)
44901 d_copy.target = gen_lowpart (V8SFmode, d->target);
44902 else
44903 d_copy.target = gen_reg_rtx (V8SFmode);
44904 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44905 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44906 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44907 {
44908 if (!d->testing_p)
44909 emit_move_insn (d->target,
44910 gen_lowpart (V8SImode, d_copy.target));
44911 return true;
44912 }
44913 return false;
44914 }
44915
44916 if (d->testing_p)
44917 break;
44918
44919 t1 = gen_reg_rtx (V8SImode);
44920 t2 = gen_reg_rtx (V8SImode);
44921 t3 = gen_reg_rtx (V4DImode);
44922 t4 = gen_reg_rtx (V4DImode);
44923 t5 = gen_reg_rtx (V4DImode);
44924
44925 /* Shuffle the lanes around into
44926 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44927 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44928 gen_lowpart (V4DImode, d->op1),
44929 GEN_INT (0x20)));
44930 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44931 gen_lowpart (V4DImode, d->op1),
44932 GEN_INT (0x31)));
44933
44934 /* Swap the 2nd and 3rd position in each lane into
44935 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44936 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44937 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44938 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44939 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44940
44941 /* Now an vpunpck[lh]qdq will produce
44942 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44943 if (odd)
44944 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44945 gen_lowpart (V4DImode, t2));
44946 else
44947 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44948 gen_lowpart (V4DImode, t2));
44949 emit_insn (t3);
44950 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44951 break;
44952
44953 default:
44954 gcc_unreachable ();
44955 }
44956
44957 return true;
44958 }
44959
44960 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44961 extract-even and extract-odd permutations. */
44962
44963 static bool
44964 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44965 {
44966 unsigned i, odd, nelt = d->nelt;
44967
44968 odd = d->perm[0];
44969 if (odd != 0 && odd != 1)
44970 return false;
44971
44972 for (i = 1; i < nelt; ++i)
44973 if (d->perm[i] != 2 * i + odd)
44974 return false;
44975
44976 return expand_vec_perm_even_odd_1 (d, odd);
44977 }
44978
44979 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44980 permutations. We assume that expand_vec_perm_1 has already failed. */
44981
44982 static bool
44983 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44984 {
44985 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44986 enum machine_mode vmode = d->vmode;
44987 unsigned char perm2[4];
44988 rtx op0 = d->op0, dest;
44989 bool ok;
44990
44991 switch (vmode)
44992 {
44993 case V4DFmode:
44994 case V8SFmode:
44995 /* These are special-cased in sse.md so that we can optionally
44996 use the vbroadcast instruction. They expand to two insns
44997 if the input happens to be in a register. */
44998 gcc_unreachable ();
44999
45000 case V2DFmode:
45001 case V2DImode:
45002 case V4SFmode:
45003 case V4SImode:
45004 /* These are always implementable using standard shuffle patterns. */
45005 gcc_unreachable ();
45006
45007 case V8HImode:
45008 case V16QImode:
45009 /* These can be implemented via interleave. We save one insn by
45010 stopping once we have promoted to V4SImode and then use pshufd. */
45011 if (d->testing_p)
45012 return true;
45013 do
45014 {
45015 rtx dest;
45016 rtx (*gen) (rtx, rtx, rtx)
45017 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
45018 : gen_vec_interleave_lowv8hi;
45019
45020 if (elt >= nelt2)
45021 {
45022 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
45023 : gen_vec_interleave_highv8hi;
45024 elt -= nelt2;
45025 }
45026 nelt2 /= 2;
45027
45028 dest = gen_reg_rtx (vmode);
45029 emit_insn (gen (dest, op0, op0));
45030 vmode = get_mode_wider_vector (vmode);
45031 op0 = gen_lowpart (vmode, dest);
45032 }
45033 while (vmode != V4SImode);
45034
45035 memset (perm2, elt, 4);
45036 dest = gen_reg_rtx (V4SImode);
45037 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
45038 gcc_assert (ok);
45039 if (!d->testing_p)
45040 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
45041 return true;
45042
45043 case V32QImode:
45044 case V16HImode:
45045 case V8SImode:
45046 case V4DImode:
45047 /* For AVX2 broadcasts of the first element vpbroadcast* or
45048 vpermq should be used by expand_vec_perm_1. */
45049 gcc_assert (!TARGET_AVX2 || d->perm[0]);
45050 return false;
45051
45052 default:
45053 gcc_unreachable ();
45054 }
45055 }
45056
45057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
45058 broadcast permutations. */
45059
45060 static bool
45061 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
45062 {
45063 unsigned i, elt, nelt = d->nelt;
45064
45065 if (!d->one_operand_p)
45066 return false;
45067
45068 elt = d->perm[0];
45069 for (i = 1; i < nelt; ++i)
45070 if (d->perm[i] != elt)
45071 return false;
45072
45073 return expand_vec_perm_broadcast_1 (d);
45074 }
45075
45076 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
45077 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
45078 all the shorter instruction sequences. */
45079
45080 static bool
45081 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
45082 {
45083 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
45084 unsigned int i, nelt, eltsz;
45085 bool used[4];
45086
45087 if (!TARGET_AVX2
45088 || d->one_operand_p
45089 || (d->vmode != V32QImode && d->vmode != V16HImode))
45090 return false;
45091
45092 if (d->testing_p)
45093 return true;
45094
45095 nelt = d->nelt;
45096 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
45097
45098 /* Generate 4 permutation masks. If the required element is within
45099 the same lane, it is shuffled in. If the required element from the
45100 other lane, force a zero by setting bit 7 in the permutation mask.
45101 In the other mask the mask has non-negative elements if element
45102 is requested from the other lane, but also moved to the other lane,
45103 so that the result of vpshufb can have the two V2TImode halves
45104 swapped. */
45105 m128 = GEN_INT (-128);
45106 for (i = 0; i < 32; ++i)
45107 {
45108 rperm[0][i] = m128;
45109 rperm[1][i] = m128;
45110 rperm[2][i] = m128;
45111 rperm[3][i] = m128;
45112 }
45113 used[0] = false;
45114 used[1] = false;
45115 used[2] = false;
45116 used[3] = false;
45117 for (i = 0; i < nelt; ++i)
45118 {
45119 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
45120 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
45121 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
45122
45123 for (j = 0; j < eltsz; ++j)
45124 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
45125 used[which] = true;
45126 }
45127
45128 for (i = 0; i < 2; ++i)
45129 {
45130 if (!used[2 * i + 1])
45131 {
45132 h[i] = NULL_RTX;
45133 continue;
45134 }
45135 vperm = gen_rtx_CONST_VECTOR (V32QImode,
45136 gen_rtvec_v (32, rperm[2 * i + 1]));
45137 vperm = force_reg (V32QImode, vperm);
45138 h[i] = gen_reg_rtx (V32QImode);
45139 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45140 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
45141 }
45142
45143 /* Swap the 128-byte lanes of h[X]. */
45144 for (i = 0; i < 2; ++i)
45145 {
45146 if (h[i] == NULL_RTX)
45147 continue;
45148 op = gen_reg_rtx (V4DImode);
45149 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
45150 const2_rtx, GEN_INT (3), const0_rtx,
45151 const1_rtx));
45152 h[i] = gen_lowpart (V32QImode, op);
45153 }
45154
45155 for (i = 0; i < 2; ++i)
45156 {
45157 if (!used[2 * i])
45158 {
45159 l[i] = NULL_RTX;
45160 continue;
45161 }
45162 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
45163 vperm = force_reg (V32QImode, vperm);
45164 l[i] = gen_reg_rtx (V32QImode);
45165 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45166 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
45167 }
45168
45169 for (i = 0; i < 2; ++i)
45170 {
45171 if (h[i] && l[i])
45172 {
45173 op = gen_reg_rtx (V32QImode);
45174 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
45175 l[i] = op;
45176 }
45177 else if (h[i])
45178 l[i] = h[i];
45179 }
45180
45181 gcc_assert (l[0] && l[1]);
45182 op = d->target;
45183 if (d->vmode != V32QImode)
45184 op = gen_reg_rtx (V32QImode);
45185 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
45186 if (op != d->target)
45187 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
45188 return true;
45189 }
45190
45191 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
45192 With all of the interface bits taken care of, perform the expansion
45193 in D and return true on success. */
45194
45195 static bool
45196 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
45197 {
45198 /* Try a single instruction expansion. */
45199 if (expand_vec_perm_1 (d))
45200 return true;
45201
45202 /* Try sequences of two instructions. */
45203
45204 if (expand_vec_perm_pshuflw_pshufhw (d))
45205 return true;
45206
45207 if (expand_vec_perm_palignr (d, false))
45208 return true;
45209
45210 if (expand_vec_perm_interleave2 (d))
45211 return true;
45212
45213 if (expand_vec_perm_broadcast (d))
45214 return true;
45215
45216 if (expand_vec_perm_vpermq_perm_1 (d))
45217 return true;
45218
45219 if (expand_vec_perm_vperm2f128 (d))
45220 return true;
45221
45222 if (expand_vec_perm_pblendv (d))
45223 return true;
45224
45225 /* Try sequences of three instructions. */
45226
45227 if (expand_vec_perm_2vperm2f128_vshuf (d))
45228 return true;
45229
45230 if (expand_vec_perm_pshufb2 (d))
45231 return true;
45232
45233 if (expand_vec_perm_interleave3 (d))
45234 return true;
45235
45236 if (expand_vec_perm_vperm2f128_vblend (d))
45237 return true;
45238
45239 /* Try sequences of four instructions. */
45240
45241 if (expand_vec_perm_vpshufb2_vpermq (d))
45242 return true;
45243
45244 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
45245 return true;
45246
45247 /* ??? Look for narrow permutations whose element orderings would
45248 allow the promotion to a wider mode. */
45249
45250 /* ??? Look for sequences of interleave or a wider permute that place
45251 the data into the correct lanes for a half-vector shuffle like
45252 pshuf[lh]w or vpermilps. */
45253
45254 /* ??? Look for sequences of interleave that produce the desired results.
45255 The combinatorics of punpck[lh] get pretty ugly... */
45256
45257 if (expand_vec_perm_even_odd (d))
45258 return true;
45259
45260 /* Even longer sequences. */
45261 if (expand_vec_perm_vpshufb4_vpermq2 (d))
45262 return true;
45263
45264 return false;
45265 }
45266
45267 /* If a permutation only uses one operand, make it clear. Returns true
45268 if the permutation references both operands. */
45269
45270 static bool
45271 canonicalize_perm (struct expand_vec_perm_d *d)
45272 {
45273 int i, which, nelt = d->nelt;
45274
45275 for (i = which = 0; i < nelt; ++i)
45276 which |= (d->perm[i] < nelt ? 1 : 2);
45277
45278 d->one_operand_p = true;
45279 switch (which)
45280 {
45281 default:
45282 gcc_unreachable();
45283
45284 case 3:
45285 if (!rtx_equal_p (d->op0, d->op1))
45286 {
45287 d->one_operand_p = false;
45288 break;
45289 }
45290 /* The elements of PERM do not suggest that only the first operand
45291 is used, but both operands are identical. Allow easier matching
45292 of the permutation by folding the permutation into the single
45293 input vector. */
45294 /* FALLTHRU */
45295
45296 case 2:
45297 for (i = 0; i < nelt; ++i)
45298 d->perm[i] &= nelt - 1;
45299 d->op0 = d->op1;
45300 break;
45301
45302 case 1:
45303 d->op1 = d->op0;
45304 break;
45305 }
45306
45307 return (which == 3);
45308 }
45309
45310 bool
45311 ix86_expand_vec_perm_const (rtx operands[4])
45312 {
45313 struct expand_vec_perm_d d;
45314 unsigned char perm[MAX_VECT_LEN];
45315 int i, nelt;
45316 bool two_args;
45317 rtx sel;
45318
45319 d.target = operands[0];
45320 d.op0 = operands[1];
45321 d.op1 = operands[2];
45322 sel = operands[3];
45323
45324 d.vmode = GET_MODE (d.target);
45325 gcc_assert (VECTOR_MODE_P (d.vmode));
45326 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45327 d.testing_p = false;
45328
45329 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
45330 gcc_assert (XVECLEN (sel, 0) == nelt);
45331 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
45332
45333 for (i = 0; i < nelt; ++i)
45334 {
45335 rtx e = XVECEXP (sel, 0, i);
45336 int ei = INTVAL (e) & (2 * nelt - 1);
45337 d.perm[i] = ei;
45338 perm[i] = ei;
45339 }
45340
45341 two_args = canonicalize_perm (&d);
45342
45343 if (ix86_expand_vec_perm_const_1 (&d))
45344 return true;
45345
45346 /* If the selector says both arguments are needed, but the operands are the
45347 same, the above tried to expand with one_operand_p and flattened selector.
45348 If that didn't work, retry without one_operand_p; we succeeded with that
45349 during testing. */
45350 if (two_args && d.one_operand_p)
45351 {
45352 d.one_operand_p = false;
45353 memcpy (d.perm, perm, sizeof (perm));
45354 return ix86_expand_vec_perm_const_1 (&d);
45355 }
45356
45357 return false;
45358 }
45359
45360 /* Implement targetm.vectorize.vec_perm_const_ok. */
45361
45362 static bool
45363 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
45364 const unsigned char *sel)
45365 {
45366 struct expand_vec_perm_d d;
45367 unsigned int i, nelt, which;
45368 bool ret;
45369
45370 d.vmode = vmode;
45371 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45372 d.testing_p = true;
45373
45374 /* Given sufficient ISA support we can just return true here
45375 for selected vector modes. */
45376 switch (d.vmode)
45377 {
45378 case V16SFmode:
45379 case V16SImode:
45380 case V8DImode:
45381 case V8DFmode:
45382 if (TARGET_AVX512F)
45383 /* All implementable with a single vpermi2 insn. */
45384 return true;
45385 break;
45386 case V32HImode:
45387 if (TARGET_AVX512BW)
45388 /* All implementable with a single vpermi2 insn. */
45389 return true;
45390 break;
45391 case V8SImode:
45392 case V8SFmode:
45393 case V4DFmode:
45394 case V4DImode:
45395 if (TARGET_AVX512VL)
45396 /* All implementable with a single vpermi2 insn. */
45397 return true;
45398 break;
45399 case V16HImode:
45400 if (TARGET_AVX2)
45401 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
45402 return true;
45403 break;
45404 case V32QImode:
45405 if (TARGET_AVX2)
45406 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
45407 return true;
45408 break;
45409 case V4SImode:
45410 case V4SFmode:
45411 case V8HImode:
45412 case V16QImode:
45413 /* All implementable with a single vpperm insn. */
45414 if (TARGET_XOP)
45415 return true;
45416 /* All implementable with 2 pshufb + 1 ior. */
45417 if (TARGET_SSSE3)
45418 return true;
45419 break;
45420 case V2DImode:
45421 case V2DFmode:
45422 /* All implementable with shufpd or unpck[lh]pd. */
45423 return true;
45424 default:
45425 return false;
45426 }
45427
45428 /* Extract the values from the vector CST into the permutation
45429 array in D. */
45430 memcpy (d.perm, sel, nelt);
45431 for (i = which = 0; i < nelt; ++i)
45432 {
45433 unsigned char e = d.perm[i];
45434 gcc_assert (e < 2 * nelt);
45435 which |= (e < nelt ? 1 : 2);
45436 }
45437
45438 /* For all elements from second vector, fold the elements to first. */
45439 if (which == 2)
45440 for (i = 0; i < nelt; ++i)
45441 d.perm[i] -= nelt;
45442
45443 /* Check whether the mask can be applied to the vector type. */
45444 d.one_operand_p = (which != 3);
45445
45446 /* Implementable with shufps or pshufd. */
45447 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
45448 return true;
45449
45450 /* Otherwise we have to go through the motions and see if we can
45451 figure out how to generate the requested permutation. */
45452 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
45453 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
45454 if (!d.one_operand_p)
45455 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
45456
45457 start_sequence ();
45458 ret = ix86_expand_vec_perm_const_1 (&d);
45459 end_sequence ();
45460
45461 return ret;
45462 }
45463
45464 void
45465 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45466 {
45467 struct expand_vec_perm_d d;
45468 unsigned i, nelt;
45469
45470 d.target = targ;
45471 d.op0 = op0;
45472 d.op1 = op1;
45473 d.vmode = GET_MODE (targ);
45474 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45475 d.one_operand_p = false;
45476 d.testing_p = false;
45477
45478 for (i = 0; i < nelt; ++i)
45479 d.perm[i] = i * 2 + odd;
45480
45481 /* We'll either be able to implement the permutation directly... */
45482 if (expand_vec_perm_1 (&d))
45483 return;
45484
45485 /* ... or we use the special-case patterns. */
45486 expand_vec_perm_even_odd_1 (&d, odd);
45487 }
45488
45489 static void
45490 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45491 {
45492 struct expand_vec_perm_d d;
45493 unsigned i, nelt, base;
45494 bool ok;
45495
45496 d.target = targ;
45497 d.op0 = op0;
45498 d.op1 = op1;
45499 d.vmode = GET_MODE (targ);
45500 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45501 d.one_operand_p = false;
45502 d.testing_p = false;
45503
45504 base = high_p ? nelt / 2 : 0;
45505 for (i = 0; i < nelt / 2; ++i)
45506 {
45507 d.perm[i * 2] = i + base;
45508 d.perm[i * 2 + 1] = i + base + nelt;
45509 }
45510
45511 /* Note that for AVX this isn't one instruction. */
45512 ok = ix86_expand_vec_perm_const_1 (&d);
45513 gcc_assert (ok);
45514 }
45515
45516
45517 /* Expand a vector operation CODE for a V*QImode in terms of the
45518 same operation on V*HImode. */
45519
45520 void
45521 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45522 {
45523 enum machine_mode qimode = GET_MODE (dest);
45524 enum machine_mode himode;
45525 rtx (*gen_il) (rtx, rtx, rtx);
45526 rtx (*gen_ih) (rtx, rtx, rtx);
45527 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45528 struct expand_vec_perm_d d;
45529 bool ok, full_interleave;
45530 bool uns_p = false;
45531 int i;
45532
45533 switch (qimode)
45534 {
45535 case V16QImode:
45536 himode = V8HImode;
45537 gen_il = gen_vec_interleave_lowv16qi;
45538 gen_ih = gen_vec_interleave_highv16qi;
45539 break;
45540 case V32QImode:
45541 himode = V16HImode;
45542 gen_il = gen_avx2_interleave_lowv32qi;
45543 gen_ih = gen_avx2_interleave_highv32qi;
45544 break;
45545 case V64QImode:
45546 himode = V32HImode;
45547 gen_il = gen_avx512bw_interleave_lowv64qi;
45548 gen_ih = gen_avx512bw_interleave_highv64qi;
45549 break;
45550 default:
45551 gcc_unreachable ();
45552 }
45553
45554 op2_l = op2_h = op2;
45555 switch (code)
45556 {
45557 case MULT:
45558 /* Unpack data such that we've got a source byte in each low byte of
45559 each word. We don't care what goes into the high byte of each word.
45560 Rather than trying to get zero in there, most convenient is to let
45561 it be a copy of the low byte. */
45562 op2_l = gen_reg_rtx (qimode);
45563 op2_h = gen_reg_rtx (qimode);
45564 emit_insn (gen_il (op2_l, op2, op2));
45565 emit_insn (gen_ih (op2_h, op2, op2));
45566 /* FALLTHRU */
45567
45568 op1_l = gen_reg_rtx (qimode);
45569 op1_h = gen_reg_rtx (qimode);
45570 emit_insn (gen_il (op1_l, op1, op1));
45571 emit_insn (gen_ih (op1_h, op1, op1));
45572 full_interleave = qimode == V16QImode;
45573 break;
45574
45575 case ASHIFT:
45576 case LSHIFTRT:
45577 uns_p = true;
45578 /* FALLTHRU */
45579 case ASHIFTRT:
45580 op1_l = gen_reg_rtx (himode);
45581 op1_h = gen_reg_rtx (himode);
45582 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45583 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45584 full_interleave = true;
45585 break;
45586 default:
45587 gcc_unreachable ();
45588 }
45589
45590 /* Perform the operation. */
45591 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45592 1, OPTAB_DIRECT);
45593 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45594 1, OPTAB_DIRECT);
45595 gcc_assert (res_l && res_h);
45596
45597 /* Merge the data back into the right place. */
45598 d.target = dest;
45599 d.op0 = gen_lowpart (qimode, res_l);
45600 d.op1 = gen_lowpart (qimode, res_h);
45601 d.vmode = qimode;
45602 d.nelt = GET_MODE_NUNITS (qimode);
45603 d.one_operand_p = false;
45604 d.testing_p = false;
45605
45606 if (full_interleave)
45607 {
45608 /* For SSE2, we used an full interleave, so the desired
45609 results are in the even elements. */
45610 for (i = 0; i < 64; ++i)
45611 d.perm[i] = i * 2;
45612 }
45613 else
45614 {
45615 /* For AVX, the interleave used above was not cross-lane. So the
45616 extraction is evens but with the second and third quarter swapped.
45617 Happily, that is even one insn shorter than even extraction. */
45618 for (i = 0; i < 64; ++i)
45619 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45620 }
45621
45622 ok = ix86_expand_vec_perm_const_1 (&d);
45623 gcc_assert (ok);
45624
45625 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45626 gen_rtx_fmt_ee (code, qimode, op1, op2));
45627 }
45628
45629 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45630 if op is CONST_VECTOR with all odd elements equal to their
45631 preceding element. */
45632
45633 static bool
45634 const_vector_equal_evenodd_p (rtx op)
45635 {
45636 enum machine_mode mode = GET_MODE (op);
45637 int i, nunits = GET_MODE_NUNITS (mode);
45638 if (GET_CODE (op) != CONST_VECTOR
45639 || nunits != CONST_VECTOR_NUNITS (op))
45640 return false;
45641 for (i = 0; i < nunits; i += 2)
45642 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45643 return false;
45644 return true;
45645 }
45646
45647 void
45648 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45649 bool uns_p, bool odd_p)
45650 {
45651 enum machine_mode mode = GET_MODE (op1);
45652 enum machine_mode wmode = GET_MODE (dest);
45653 rtx x;
45654 rtx orig_op1 = op1, orig_op2 = op2;
45655
45656 if (!nonimmediate_operand (op1, mode))
45657 op1 = force_reg (mode, op1);
45658 if (!nonimmediate_operand (op2, mode))
45659 op2 = force_reg (mode, op2);
45660
45661 /* We only play even/odd games with vectors of SImode. */
45662 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45663
45664 /* If we're looking for the odd results, shift those members down to
45665 the even slots. For some cpus this is faster than a PSHUFD. */
45666 if (odd_p)
45667 {
45668 /* For XOP use vpmacsdqh, but only for smult, as it is only
45669 signed. */
45670 if (TARGET_XOP && mode == V4SImode && !uns_p)
45671 {
45672 x = force_reg (wmode, CONST0_RTX (wmode));
45673 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45674 return;
45675 }
45676
45677 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45678 if (!const_vector_equal_evenodd_p (orig_op1))
45679 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45680 x, NULL, 1, OPTAB_DIRECT);
45681 if (!const_vector_equal_evenodd_p (orig_op2))
45682 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45683 x, NULL, 1, OPTAB_DIRECT);
45684 op1 = gen_lowpart (mode, op1);
45685 op2 = gen_lowpart (mode, op2);
45686 }
45687
45688 if (mode == V16SImode)
45689 {
45690 if (uns_p)
45691 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45692 else
45693 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45694 }
45695 else if (mode == V8SImode)
45696 {
45697 if (uns_p)
45698 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45699 else
45700 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45701 }
45702 else if (uns_p)
45703 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45704 else if (TARGET_SSE4_1)
45705 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45706 else
45707 {
45708 rtx s1, s2, t0, t1, t2;
45709
45710 /* The easiest way to implement this without PMULDQ is to go through
45711 the motions as if we are performing a full 64-bit multiply. With
45712 the exception that we need to do less shuffling of the elements. */
45713
45714 /* Compute the sign-extension, aka highparts, of the two operands. */
45715 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45716 op1, pc_rtx, pc_rtx);
45717 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45718 op2, pc_rtx, pc_rtx);
45719
45720 /* Multiply LO(A) * HI(B), and vice-versa. */
45721 t1 = gen_reg_rtx (wmode);
45722 t2 = gen_reg_rtx (wmode);
45723 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45724 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45725
45726 /* Multiply LO(A) * LO(B). */
45727 t0 = gen_reg_rtx (wmode);
45728 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45729
45730 /* Combine and shift the highparts into place. */
45731 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45732 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45733 1, OPTAB_DIRECT);
45734
45735 /* Combine high and low parts. */
45736 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45737 return;
45738 }
45739 emit_insn (x);
45740 }
45741
45742 void
45743 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45744 bool uns_p, bool high_p)
45745 {
45746 enum machine_mode wmode = GET_MODE (dest);
45747 enum machine_mode mode = GET_MODE (op1);
45748 rtx t1, t2, t3, t4, mask;
45749
45750 switch (mode)
45751 {
45752 case V4SImode:
45753 t1 = gen_reg_rtx (mode);
45754 t2 = gen_reg_rtx (mode);
45755 if (TARGET_XOP && !uns_p)
45756 {
45757 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45758 shuffle the elements once so that all elements are in the right
45759 place for immediate use: { A C B D }. */
45760 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45761 const1_rtx, GEN_INT (3)));
45762 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45763 const1_rtx, GEN_INT (3)));
45764 }
45765 else
45766 {
45767 /* Put the elements into place for the multiply. */
45768 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45769 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45770 high_p = false;
45771 }
45772 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45773 break;
45774
45775 case V8SImode:
45776 /* Shuffle the elements between the lanes. After this we
45777 have { A B E F | C D G H } for each operand. */
45778 t1 = gen_reg_rtx (V4DImode);
45779 t2 = gen_reg_rtx (V4DImode);
45780 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45781 const0_rtx, const2_rtx,
45782 const1_rtx, GEN_INT (3)));
45783 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45784 const0_rtx, const2_rtx,
45785 const1_rtx, GEN_INT (3)));
45786
45787 /* Shuffle the elements within the lanes. After this we
45788 have { A A B B | C C D D } or { E E F F | G G H H }. */
45789 t3 = gen_reg_rtx (V8SImode);
45790 t4 = gen_reg_rtx (V8SImode);
45791 mask = GEN_INT (high_p
45792 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45793 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45794 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45795 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45796
45797 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45798 break;
45799
45800 case V8HImode:
45801 case V16HImode:
45802 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45803 uns_p, OPTAB_DIRECT);
45804 t2 = expand_binop (mode,
45805 uns_p ? umul_highpart_optab : smul_highpart_optab,
45806 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45807 gcc_assert (t1 && t2);
45808
45809 t3 = gen_reg_rtx (mode);
45810 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45811 emit_move_insn (dest, gen_lowpart (wmode, t3));
45812 break;
45813
45814 case V16QImode:
45815 case V32QImode:
45816 case V32HImode:
45817 case V16SImode:
45818 case V64QImode:
45819 t1 = gen_reg_rtx (wmode);
45820 t2 = gen_reg_rtx (wmode);
45821 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45822 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45823
45824 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45825 break;
45826
45827 default:
45828 gcc_unreachable ();
45829 }
45830 }
45831
45832 void
45833 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45834 {
45835 rtx res_1, res_2, res_3, res_4;
45836
45837 res_1 = gen_reg_rtx (V4SImode);
45838 res_2 = gen_reg_rtx (V4SImode);
45839 res_3 = gen_reg_rtx (V2DImode);
45840 res_4 = gen_reg_rtx (V2DImode);
45841 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45842 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45843
45844 /* Move the results in element 2 down to element 1; we don't care
45845 what goes in elements 2 and 3. Then we can merge the parts
45846 back together with an interleave.
45847
45848 Note that two other sequences were tried:
45849 (1) Use interleaves at the start instead of psrldq, which allows
45850 us to use a single shufps to merge things back at the end.
45851 (2) Use shufps here to combine the two vectors, then pshufd to
45852 put the elements in the correct order.
45853 In both cases the cost of the reformatting stall was too high
45854 and the overall sequence slower. */
45855
45856 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45857 const0_rtx, const2_rtx,
45858 const0_rtx, const0_rtx));
45859 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45860 const0_rtx, const2_rtx,
45861 const0_rtx, const0_rtx));
45862 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45863
45864 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45865 }
45866
45867 void
45868 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45869 {
45870 enum machine_mode mode = GET_MODE (op0);
45871 rtx t1, t2, t3, t4, t5, t6;
45872
45873 if (TARGET_AVX512DQ && mode == V8DImode)
45874 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
45875 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
45876 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
45877 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
45878 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
45879 else if (TARGET_XOP && mode == V2DImode)
45880 {
45881 /* op1: A,B,C,D, op2: E,F,G,H */
45882 op1 = gen_lowpart (V4SImode, op1);
45883 op2 = gen_lowpart (V4SImode, op2);
45884
45885 t1 = gen_reg_rtx (V4SImode);
45886 t2 = gen_reg_rtx (V4SImode);
45887 t3 = gen_reg_rtx (V2DImode);
45888 t4 = gen_reg_rtx (V2DImode);
45889
45890 /* t1: B,A,D,C */
45891 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45892 GEN_INT (1),
45893 GEN_INT (0),
45894 GEN_INT (3),
45895 GEN_INT (2)));
45896
45897 /* t2: (B*E),(A*F),(D*G),(C*H) */
45898 emit_insn (gen_mulv4si3 (t2, t1, op2));
45899
45900 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45901 emit_insn (gen_xop_phadddq (t3, t2));
45902
45903 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45904 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45905
45906 /* Multiply lower parts and add all */
45907 t5 = gen_reg_rtx (V2DImode);
45908 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45909 gen_lowpart (V4SImode, op1),
45910 gen_lowpart (V4SImode, op2)));
45911 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45912
45913 }
45914 else
45915 {
45916 enum machine_mode nmode;
45917 rtx (*umul) (rtx, rtx, rtx);
45918
45919 if (mode == V2DImode)
45920 {
45921 umul = gen_vec_widen_umult_even_v4si;
45922 nmode = V4SImode;
45923 }
45924 else if (mode == V4DImode)
45925 {
45926 umul = gen_vec_widen_umult_even_v8si;
45927 nmode = V8SImode;
45928 }
45929 else if (mode == V8DImode)
45930 {
45931 umul = gen_vec_widen_umult_even_v16si;
45932 nmode = V16SImode;
45933 }
45934 else
45935 gcc_unreachable ();
45936
45937
45938 /* Multiply low parts. */
45939 t1 = gen_reg_rtx (mode);
45940 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45941
45942 /* Shift input vectors right 32 bits so we can multiply high parts. */
45943 t6 = GEN_INT (32);
45944 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45945 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45946
45947 /* Multiply high parts by low parts. */
45948 t4 = gen_reg_rtx (mode);
45949 t5 = gen_reg_rtx (mode);
45950 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45951 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45952
45953 /* Combine and shift the highparts back. */
45954 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45955 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45956
45957 /* Combine high and low parts. */
45958 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45959 }
45960
45961 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45962 gen_rtx_MULT (mode, op1, op2));
45963 }
45964
45965 /* Calculate integer abs() using only SSE2 instructions. */
45966
45967 void
45968 ix86_expand_sse2_abs (rtx target, rtx input)
45969 {
45970 enum machine_mode mode = GET_MODE (target);
45971 rtx tmp0, tmp1, x;
45972
45973 switch (mode)
45974 {
45975 /* For 32-bit signed integer X, the best way to calculate the absolute
45976 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45977 case V4SImode:
45978 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45979 GEN_INT (GET_MODE_BITSIZE
45980 (GET_MODE_INNER (mode)) - 1),
45981 NULL, 0, OPTAB_DIRECT);
45982 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45983 NULL, 0, OPTAB_DIRECT);
45984 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45985 target, 0, OPTAB_DIRECT);
45986 break;
45987
45988 /* For 16-bit signed integer X, the best way to calculate the absolute
45989 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45990 case V8HImode:
45991 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45992
45993 x = expand_simple_binop (mode, SMAX, tmp0, input,
45994 target, 0, OPTAB_DIRECT);
45995 break;
45996
45997 /* For 8-bit signed integer X, the best way to calculate the absolute
45998 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45999 as SSE2 provides the PMINUB insn. */
46000 case V16QImode:
46001 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
46002
46003 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
46004 target, 0, OPTAB_DIRECT);
46005 break;
46006
46007 default:
46008 gcc_unreachable ();
46009 }
46010
46011 if (x != target)
46012 emit_move_insn (target, x);
46013 }
46014
46015 /* Expand an insert into a vector register through pinsr insn.
46016 Return true if successful. */
46017
46018 bool
46019 ix86_expand_pinsr (rtx *operands)
46020 {
46021 rtx dst = operands[0];
46022 rtx src = operands[3];
46023
46024 unsigned int size = INTVAL (operands[1]);
46025 unsigned int pos = INTVAL (operands[2]);
46026
46027 if (GET_CODE (dst) == SUBREG)
46028 {
46029 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
46030 dst = SUBREG_REG (dst);
46031 }
46032
46033 if (GET_CODE (src) == SUBREG)
46034 src = SUBREG_REG (src);
46035
46036 switch (GET_MODE (dst))
46037 {
46038 case V16QImode:
46039 case V8HImode:
46040 case V4SImode:
46041 case V2DImode:
46042 {
46043 enum machine_mode srcmode, dstmode;
46044 rtx (*pinsr)(rtx, rtx, rtx, rtx);
46045
46046 srcmode = mode_for_size (size, MODE_INT, 0);
46047
46048 switch (srcmode)
46049 {
46050 case QImode:
46051 if (!TARGET_SSE4_1)
46052 return false;
46053 dstmode = V16QImode;
46054 pinsr = gen_sse4_1_pinsrb;
46055 break;
46056
46057 case HImode:
46058 if (!TARGET_SSE2)
46059 return false;
46060 dstmode = V8HImode;
46061 pinsr = gen_sse2_pinsrw;
46062 break;
46063
46064 case SImode:
46065 if (!TARGET_SSE4_1)
46066 return false;
46067 dstmode = V4SImode;
46068 pinsr = gen_sse4_1_pinsrd;
46069 break;
46070
46071 case DImode:
46072 gcc_assert (TARGET_64BIT);
46073 if (!TARGET_SSE4_1)
46074 return false;
46075 dstmode = V2DImode;
46076 pinsr = gen_sse4_1_pinsrq;
46077 break;
46078
46079 default:
46080 return false;
46081 }
46082
46083 rtx d = dst;
46084 if (GET_MODE (dst) != dstmode)
46085 d = gen_reg_rtx (dstmode);
46086 src = gen_lowpart (srcmode, src);
46087
46088 pos /= size;
46089
46090 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
46091 GEN_INT (1 << pos)));
46092 if (d != dst)
46093 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
46094 return true;
46095 }
46096
46097 default:
46098 return false;
46099 }
46100 }
46101 \f
46102 /* This function returns the calling abi specific va_list type node.
46103 It returns the FNDECL specific va_list type. */
46104
46105 static tree
46106 ix86_fn_abi_va_list (tree fndecl)
46107 {
46108 if (!TARGET_64BIT)
46109 return va_list_type_node;
46110 gcc_assert (fndecl != NULL_TREE);
46111
46112 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
46113 return ms_va_list_type_node;
46114 else
46115 return sysv_va_list_type_node;
46116 }
46117
46118 /* Returns the canonical va_list type specified by TYPE. If there
46119 is no valid TYPE provided, it return NULL_TREE. */
46120
46121 static tree
46122 ix86_canonical_va_list_type (tree type)
46123 {
46124 tree wtype, htype;
46125
46126 /* Resolve references and pointers to va_list type. */
46127 if (TREE_CODE (type) == MEM_REF)
46128 type = TREE_TYPE (type);
46129 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
46130 type = TREE_TYPE (type);
46131 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
46132 type = TREE_TYPE (type);
46133
46134 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
46135 {
46136 wtype = va_list_type_node;
46137 gcc_assert (wtype != NULL_TREE);
46138 htype = type;
46139 if (TREE_CODE (wtype) == ARRAY_TYPE)
46140 {
46141 /* If va_list is an array type, the argument may have decayed
46142 to a pointer type, e.g. by being passed to another function.
46143 In that case, unwrap both types so that we can compare the
46144 underlying records. */
46145 if (TREE_CODE (htype) == ARRAY_TYPE
46146 || POINTER_TYPE_P (htype))
46147 {
46148 wtype = TREE_TYPE (wtype);
46149 htype = TREE_TYPE (htype);
46150 }
46151 }
46152 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46153 return va_list_type_node;
46154 wtype = sysv_va_list_type_node;
46155 gcc_assert (wtype != NULL_TREE);
46156 htype = type;
46157 if (TREE_CODE (wtype) == ARRAY_TYPE)
46158 {
46159 /* If va_list is an array type, the argument may have decayed
46160 to a pointer type, e.g. by being passed to another function.
46161 In that case, unwrap both types so that we can compare the
46162 underlying records. */
46163 if (TREE_CODE (htype) == ARRAY_TYPE
46164 || POINTER_TYPE_P (htype))
46165 {
46166 wtype = TREE_TYPE (wtype);
46167 htype = TREE_TYPE (htype);
46168 }
46169 }
46170 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46171 return sysv_va_list_type_node;
46172 wtype = ms_va_list_type_node;
46173 gcc_assert (wtype != NULL_TREE);
46174 htype = type;
46175 if (TREE_CODE (wtype) == ARRAY_TYPE)
46176 {
46177 /* If va_list is an array type, the argument may have decayed
46178 to a pointer type, e.g. by being passed to another function.
46179 In that case, unwrap both types so that we can compare the
46180 underlying records. */
46181 if (TREE_CODE (htype) == ARRAY_TYPE
46182 || POINTER_TYPE_P (htype))
46183 {
46184 wtype = TREE_TYPE (wtype);
46185 htype = TREE_TYPE (htype);
46186 }
46187 }
46188 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46189 return ms_va_list_type_node;
46190 return NULL_TREE;
46191 }
46192 return std_canonical_va_list_type (type);
46193 }
46194
46195 /* Iterate through the target-specific builtin types for va_list.
46196 IDX denotes the iterator, *PTREE is set to the result type of
46197 the va_list builtin, and *PNAME to its internal type.
46198 Returns zero if there is no element for this index, otherwise
46199 IDX should be increased upon the next call.
46200 Note, do not iterate a base builtin's name like __builtin_va_list.
46201 Used from c_common_nodes_and_builtins. */
46202
46203 static int
46204 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
46205 {
46206 if (TARGET_64BIT)
46207 {
46208 switch (idx)
46209 {
46210 default:
46211 break;
46212
46213 case 0:
46214 *ptree = ms_va_list_type_node;
46215 *pname = "__builtin_ms_va_list";
46216 return 1;
46217
46218 case 1:
46219 *ptree = sysv_va_list_type_node;
46220 *pname = "__builtin_sysv_va_list";
46221 return 1;
46222 }
46223 }
46224
46225 return 0;
46226 }
46227
46228 #undef TARGET_SCHED_DISPATCH
46229 #define TARGET_SCHED_DISPATCH has_dispatch
46230 #undef TARGET_SCHED_DISPATCH_DO
46231 #define TARGET_SCHED_DISPATCH_DO do_dispatch
46232 #undef TARGET_SCHED_REASSOCIATION_WIDTH
46233 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
46234 #undef TARGET_SCHED_REORDER
46235 #define TARGET_SCHED_REORDER ix86_sched_reorder
46236 #undef TARGET_SCHED_ADJUST_PRIORITY
46237 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
46238 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
46239 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
46240 ix86_dependencies_evaluation_hook
46241
46242 /* The size of the dispatch window is the total number of bytes of
46243 object code allowed in a window. */
46244 #define DISPATCH_WINDOW_SIZE 16
46245
46246 /* Number of dispatch windows considered for scheduling. */
46247 #define MAX_DISPATCH_WINDOWS 3
46248
46249 /* Maximum number of instructions in a window. */
46250 #define MAX_INSN 4
46251
46252 /* Maximum number of immediate operands in a window. */
46253 #define MAX_IMM 4
46254
46255 /* Maximum number of immediate bits allowed in a window. */
46256 #define MAX_IMM_SIZE 128
46257
46258 /* Maximum number of 32 bit immediates allowed in a window. */
46259 #define MAX_IMM_32 4
46260
46261 /* Maximum number of 64 bit immediates allowed in a window. */
46262 #define MAX_IMM_64 2
46263
46264 /* Maximum total of loads or prefetches allowed in a window. */
46265 #define MAX_LOAD 2
46266
46267 /* Maximum total of stores allowed in a window. */
46268 #define MAX_STORE 1
46269
46270 #undef BIG
46271 #define BIG 100
46272
46273
46274 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
46275 enum dispatch_group {
46276 disp_no_group = 0,
46277 disp_load,
46278 disp_store,
46279 disp_load_store,
46280 disp_prefetch,
46281 disp_imm,
46282 disp_imm_32,
46283 disp_imm_64,
46284 disp_branch,
46285 disp_cmp,
46286 disp_jcc,
46287 disp_last
46288 };
46289
46290 /* Number of allowable groups in a dispatch window. It is an array
46291 indexed by dispatch_group enum. 100 is used as a big number,
46292 because the number of these kind of operations does not have any
46293 effect in dispatch window, but we need them for other reasons in
46294 the table. */
46295 static unsigned int num_allowable_groups[disp_last] = {
46296 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
46297 };
46298
46299 char group_name[disp_last + 1][16] = {
46300 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
46301 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
46302 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
46303 };
46304
46305 /* Instruction path. */
46306 enum insn_path {
46307 no_path = 0,
46308 path_single, /* Single micro op. */
46309 path_double, /* Double micro op. */
46310 path_multi, /* Instructions with more than 2 micro op.. */
46311 last_path
46312 };
46313
46314 /* sched_insn_info defines a window to the instructions scheduled in
46315 the basic block. It contains a pointer to the insn_info table and
46316 the instruction scheduled.
46317
46318 Windows are allocated for each basic block and are linked
46319 together. */
46320 typedef struct sched_insn_info_s {
46321 rtx insn;
46322 enum dispatch_group group;
46323 enum insn_path path;
46324 int byte_len;
46325 int imm_bytes;
46326 } sched_insn_info;
46327
46328 /* Linked list of dispatch windows. This is a two way list of
46329 dispatch windows of a basic block. It contains information about
46330 the number of uops in the window and the total number of
46331 instructions and of bytes in the object code for this dispatch
46332 window. */
46333 typedef struct dispatch_windows_s {
46334 int num_insn; /* Number of insn in the window. */
46335 int num_uops; /* Number of uops in the window. */
46336 int window_size; /* Number of bytes in the window. */
46337 int window_num; /* Window number between 0 or 1. */
46338 int num_imm; /* Number of immediates in an insn. */
46339 int num_imm_32; /* Number of 32 bit immediates in an insn. */
46340 int num_imm_64; /* Number of 64 bit immediates in an insn. */
46341 int imm_size; /* Total immediates in the window. */
46342 int num_loads; /* Total memory loads in the window. */
46343 int num_stores; /* Total memory stores in the window. */
46344 int violation; /* Violation exists in window. */
46345 sched_insn_info *window; /* Pointer to the window. */
46346 struct dispatch_windows_s *next;
46347 struct dispatch_windows_s *prev;
46348 } dispatch_windows;
46349
46350 /* Immediate valuse used in an insn. */
46351 typedef struct imm_info_s
46352 {
46353 int imm;
46354 int imm32;
46355 int imm64;
46356 } imm_info;
46357
46358 static dispatch_windows *dispatch_window_list;
46359 static dispatch_windows *dispatch_window_list1;
46360
46361 /* Get dispatch group of insn. */
46362
46363 static enum dispatch_group
46364 get_mem_group (rtx_insn *insn)
46365 {
46366 enum attr_memory memory;
46367
46368 if (INSN_CODE (insn) < 0)
46369 return disp_no_group;
46370 memory = get_attr_memory (insn);
46371 if (memory == MEMORY_STORE)
46372 return disp_store;
46373
46374 if (memory == MEMORY_LOAD)
46375 return disp_load;
46376
46377 if (memory == MEMORY_BOTH)
46378 return disp_load_store;
46379
46380 return disp_no_group;
46381 }
46382
46383 /* Return true if insn is a compare instruction. */
46384
46385 static bool
46386 is_cmp (rtx_insn *insn)
46387 {
46388 enum attr_type type;
46389
46390 type = get_attr_type (insn);
46391 return (type == TYPE_TEST
46392 || type == TYPE_ICMP
46393 || type == TYPE_FCMP
46394 || GET_CODE (PATTERN (insn)) == COMPARE);
46395 }
46396
46397 /* Return true if a dispatch violation encountered. */
46398
46399 static bool
46400 dispatch_violation (void)
46401 {
46402 if (dispatch_window_list->next)
46403 return dispatch_window_list->next->violation;
46404 return dispatch_window_list->violation;
46405 }
46406
46407 /* Return true if insn is a branch instruction. */
46408
46409 static bool
46410 is_branch (rtx insn)
46411 {
46412 return (CALL_P (insn) || JUMP_P (insn));
46413 }
46414
46415 /* Return true if insn is a prefetch instruction. */
46416
46417 static bool
46418 is_prefetch (rtx insn)
46419 {
46420 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
46421 }
46422
46423 /* This function initializes a dispatch window and the list container holding a
46424 pointer to the window. */
46425
46426 static void
46427 init_window (int window_num)
46428 {
46429 int i;
46430 dispatch_windows *new_list;
46431
46432 if (window_num == 0)
46433 new_list = dispatch_window_list;
46434 else
46435 new_list = dispatch_window_list1;
46436
46437 new_list->num_insn = 0;
46438 new_list->num_uops = 0;
46439 new_list->window_size = 0;
46440 new_list->next = NULL;
46441 new_list->prev = NULL;
46442 new_list->window_num = window_num;
46443 new_list->num_imm = 0;
46444 new_list->num_imm_32 = 0;
46445 new_list->num_imm_64 = 0;
46446 new_list->imm_size = 0;
46447 new_list->num_loads = 0;
46448 new_list->num_stores = 0;
46449 new_list->violation = false;
46450
46451 for (i = 0; i < MAX_INSN; i++)
46452 {
46453 new_list->window[i].insn = NULL;
46454 new_list->window[i].group = disp_no_group;
46455 new_list->window[i].path = no_path;
46456 new_list->window[i].byte_len = 0;
46457 new_list->window[i].imm_bytes = 0;
46458 }
46459 return;
46460 }
46461
46462 /* This function allocates and initializes a dispatch window and the
46463 list container holding a pointer to the window. */
46464
46465 static dispatch_windows *
46466 allocate_window (void)
46467 {
46468 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
46469 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
46470
46471 return new_list;
46472 }
46473
46474 /* This routine initializes the dispatch scheduling information. It
46475 initiates building dispatch scheduler tables and constructs the
46476 first dispatch window. */
46477
46478 static void
46479 init_dispatch_sched (void)
46480 {
46481 /* Allocate a dispatch list and a window. */
46482 dispatch_window_list = allocate_window ();
46483 dispatch_window_list1 = allocate_window ();
46484 init_window (0);
46485 init_window (1);
46486 }
46487
46488 /* This function returns true if a branch is detected. End of a basic block
46489 does not have to be a branch, but here we assume only branches end a
46490 window. */
46491
46492 static bool
46493 is_end_basic_block (enum dispatch_group group)
46494 {
46495 return group == disp_branch;
46496 }
46497
46498 /* This function is called when the end of a window processing is reached. */
46499
46500 static void
46501 process_end_window (void)
46502 {
46503 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46504 if (dispatch_window_list->next)
46505 {
46506 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46507 gcc_assert (dispatch_window_list->window_size
46508 + dispatch_window_list1->window_size <= 48);
46509 init_window (1);
46510 }
46511 init_window (0);
46512 }
46513
46514 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46515 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46516 for 48 bytes of instructions. Note that these windows are not dispatch
46517 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46518
46519 static dispatch_windows *
46520 allocate_next_window (int window_num)
46521 {
46522 if (window_num == 0)
46523 {
46524 if (dispatch_window_list->next)
46525 init_window (1);
46526 init_window (0);
46527 return dispatch_window_list;
46528 }
46529
46530 dispatch_window_list->next = dispatch_window_list1;
46531 dispatch_window_list1->prev = dispatch_window_list;
46532
46533 return dispatch_window_list1;
46534 }
46535
46536 /* Increment the number of immediate operands of an instruction. */
46537
46538 static int
46539 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46540 {
46541 if (*in_rtx == 0)
46542 return 0;
46543
46544 switch ( GET_CODE (*in_rtx))
46545 {
46546 case CONST:
46547 case SYMBOL_REF:
46548 case CONST_INT:
46549 (imm_values->imm)++;
46550 if (x86_64_immediate_operand (*in_rtx, SImode))
46551 (imm_values->imm32)++;
46552 else
46553 (imm_values->imm64)++;
46554 break;
46555
46556 case CONST_DOUBLE:
46557 (imm_values->imm)++;
46558 (imm_values->imm64)++;
46559 break;
46560
46561 case CODE_LABEL:
46562 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46563 {
46564 (imm_values->imm)++;
46565 (imm_values->imm32)++;
46566 }
46567 break;
46568
46569 default:
46570 break;
46571 }
46572
46573 return 0;
46574 }
46575
46576 /* Compute number of immediate operands of an instruction. */
46577
46578 static void
46579 find_constant (rtx in_rtx, imm_info *imm_values)
46580 {
46581 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46582 (rtx_function) find_constant_1, (void *) imm_values);
46583 }
46584
46585 /* Return total size of immediate operands of an instruction along with number
46586 of corresponding immediate-operands. It initializes its parameters to zero
46587 befor calling FIND_CONSTANT.
46588 INSN is the input instruction. IMM is the total of immediates.
46589 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46590 bit immediates. */
46591
46592 static int
46593 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46594 {
46595 imm_info imm_values = {0, 0, 0};
46596
46597 find_constant (insn, &imm_values);
46598 *imm = imm_values.imm;
46599 *imm32 = imm_values.imm32;
46600 *imm64 = imm_values.imm64;
46601 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46602 }
46603
46604 /* This function indicates if an operand of an instruction is an
46605 immediate. */
46606
46607 static bool
46608 has_immediate (rtx insn)
46609 {
46610 int num_imm_operand;
46611 int num_imm32_operand;
46612 int num_imm64_operand;
46613
46614 if (insn)
46615 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46616 &num_imm64_operand);
46617 return false;
46618 }
46619
46620 /* Return single or double path for instructions. */
46621
46622 static enum insn_path
46623 get_insn_path (rtx_insn *insn)
46624 {
46625 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46626
46627 if ((int)path == 0)
46628 return path_single;
46629
46630 if ((int)path == 1)
46631 return path_double;
46632
46633 return path_multi;
46634 }
46635
46636 /* Return insn dispatch group. */
46637
46638 static enum dispatch_group
46639 get_insn_group (rtx_insn *insn)
46640 {
46641 enum dispatch_group group = get_mem_group (insn);
46642 if (group)
46643 return group;
46644
46645 if (is_branch (insn))
46646 return disp_branch;
46647
46648 if (is_cmp (insn))
46649 return disp_cmp;
46650
46651 if (has_immediate (insn))
46652 return disp_imm;
46653
46654 if (is_prefetch (insn))
46655 return disp_prefetch;
46656
46657 return disp_no_group;
46658 }
46659
46660 /* Count number of GROUP restricted instructions in a dispatch
46661 window WINDOW_LIST. */
46662
46663 static int
46664 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
46665 {
46666 enum dispatch_group group = get_insn_group (insn);
46667 int imm_size;
46668 int num_imm_operand;
46669 int num_imm32_operand;
46670 int num_imm64_operand;
46671
46672 if (group == disp_no_group)
46673 return 0;
46674
46675 if (group == disp_imm)
46676 {
46677 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46678 &num_imm64_operand);
46679 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46680 || num_imm_operand + window_list->num_imm > MAX_IMM
46681 || (num_imm32_operand > 0
46682 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46683 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46684 || (num_imm64_operand > 0
46685 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46686 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46687 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46688 && num_imm64_operand > 0
46689 && ((window_list->num_imm_64 > 0
46690 && window_list->num_insn >= 2)
46691 || window_list->num_insn >= 3)))
46692 return BIG;
46693
46694 return 1;
46695 }
46696
46697 if ((group == disp_load_store
46698 && (window_list->num_loads >= MAX_LOAD
46699 || window_list->num_stores >= MAX_STORE))
46700 || ((group == disp_load
46701 || group == disp_prefetch)
46702 && window_list->num_loads >= MAX_LOAD)
46703 || (group == disp_store
46704 && window_list->num_stores >= MAX_STORE))
46705 return BIG;
46706
46707 return 1;
46708 }
46709
46710 /* This function returns true if insn satisfies dispatch rules on the
46711 last window scheduled. */
46712
46713 static bool
46714 fits_dispatch_window (rtx_insn *insn)
46715 {
46716 dispatch_windows *window_list = dispatch_window_list;
46717 dispatch_windows *window_list_next = dispatch_window_list->next;
46718 unsigned int num_restrict;
46719 enum dispatch_group group = get_insn_group (insn);
46720 enum insn_path path = get_insn_path (insn);
46721 int sum;
46722
46723 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46724 instructions should be given the lowest priority in the
46725 scheduling process in Haifa scheduler to make sure they will be
46726 scheduled in the same dispatch window as the reference to them. */
46727 if (group == disp_jcc || group == disp_cmp)
46728 return false;
46729
46730 /* Check nonrestricted. */
46731 if (group == disp_no_group || group == disp_branch)
46732 return true;
46733
46734 /* Get last dispatch window. */
46735 if (window_list_next)
46736 window_list = window_list_next;
46737
46738 if (window_list->window_num == 1)
46739 {
46740 sum = window_list->prev->window_size + window_list->window_size;
46741
46742 if (sum == 32
46743 || (min_insn_size (insn) + sum) >= 48)
46744 /* Window 1 is full. Go for next window. */
46745 return true;
46746 }
46747
46748 num_restrict = count_num_restricted (insn, window_list);
46749
46750 if (num_restrict > num_allowable_groups[group])
46751 return false;
46752
46753 /* See if it fits in the first window. */
46754 if (window_list->window_num == 0)
46755 {
46756 /* The first widow should have only single and double path
46757 uops. */
46758 if (path == path_double
46759 && (window_list->num_uops + 2) > MAX_INSN)
46760 return false;
46761 else if (path != path_single)
46762 return false;
46763 }
46764 return true;
46765 }
46766
46767 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46768 dispatch window WINDOW_LIST. */
46769
46770 static void
46771 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
46772 {
46773 int byte_len = min_insn_size (insn);
46774 int num_insn = window_list->num_insn;
46775 int imm_size;
46776 sched_insn_info *window = window_list->window;
46777 enum dispatch_group group = get_insn_group (insn);
46778 enum insn_path path = get_insn_path (insn);
46779 int num_imm_operand;
46780 int num_imm32_operand;
46781 int num_imm64_operand;
46782
46783 if (!window_list->violation && group != disp_cmp
46784 && !fits_dispatch_window (insn))
46785 window_list->violation = true;
46786
46787 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46788 &num_imm64_operand);
46789
46790 /* Initialize window with new instruction. */
46791 window[num_insn].insn = insn;
46792 window[num_insn].byte_len = byte_len;
46793 window[num_insn].group = group;
46794 window[num_insn].path = path;
46795 window[num_insn].imm_bytes = imm_size;
46796
46797 window_list->window_size += byte_len;
46798 window_list->num_insn = num_insn + 1;
46799 window_list->num_uops = window_list->num_uops + num_uops;
46800 window_list->imm_size += imm_size;
46801 window_list->num_imm += num_imm_operand;
46802 window_list->num_imm_32 += num_imm32_operand;
46803 window_list->num_imm_64 += num_imm64_operand;
46804
46805 if (group == disp_store)
46806 window_list->num_stores += 1;
46807 else if (group == disp_load
46808 || group == disp_prefetch)
46809 window_list->num_loads += 1;
46810 else if (group == disp_load_store)
46811 {
46812 window_list->num_stores += 1;
46813 window_list->num_loads += 1;
46814 }
46815 }
46816
46817 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46818 If the total bytes of instructions or the number of instructions in
46819 the window exceed allowable, it allocates a new window. */
46820
46821 static void
46822 add_to_dispatch_window (rtx_insn *insn)
46823 {
46824 int byte_len;
46825 dispatch_windows *window_list;
46826 dispatch_windows *next_list;
46827 dispatch_windows *window0_list;
46828 enum insn_path path;
46829 enum dispatch_group insn_group;
46830 bool insn_fits;
46831 int num_insn;
46832 int num_uops;
46833 int window_num;
46834 int insn_num_uops;
46835 int sum;
46836
46837 if (INSN_CODE (insn) < 0)
46838 return;
46839
46840 byte_len = min_insn_size (insn);
46841 window_list = dispatch_window_list;
46842 next_list = window_list->next;
46843 path = get_insn_path (insn);
46844 insn_group = get_insn_group (insn);
46845
46846 /* Get the last dispatch window. */
46847 if (next_list)
46848 window_list = dispatch_window_list->next;
46849
46850 if (path == path_single)
46851 insn_num_uops = 1;
46852 else if (path == path_double)
46853 insn_num_uops = 2;
46854 else
46855 insn_num_uops = (int) path;
46856
46857 /* If current window is full, get a new window.
46858 Window number zero is full, if MAX_INSN uops are scheduled in it.
46859 Window number one is full, if window zero's bytes plus window
46860 one's bytes is 32, or if the bytes of the new instruction added
46861 to the total makes it greater than 48, or it has already MAX_INSN
46862 instructions in it. */
46863 num_insn = window_list->num_insn;
46864 num_uops = window_list->num_uops;
46865 window_num = window_list->window_num;
46866 insn_fits = fits_dispatch_window (insn);
46867
46868 if (num_insn >= MAX_INSN
46869 || num_uops + insn_num_uops > MAX_INSN
46870 || !(insn_fits))
46871 {
46872 window_num = ~window_num & 1;
46873 window_list = allocate_next_window (window_num);
46874 }
46875
46876 if (window_num == 0)
46877 {
46878 add_insn_window (insn, window_list, insn_num_uops);
46879 if (window_list->num_insn >= MAX_INSN
46880 && insn_group == disp_branch)
46881 {
46882 process_end_window ();
46883 return;
46884 }
46885 }
46886 else if (window_num == 1)
46887 {
46888 window0_list = window_list->prev;
46889 sum = window0_list->window_size + window_list->window_size;
46890 if (sum == 32
46891 || (byte_len + sum) >= 48)
46892 {
46893 process_end_window ();
46894 window_list = dispatch_window_list;
46895 }
46896
46897 add_insn_window (insn, window_list, insn_num_uops);
46898 }
46899 else
46900 gcc_unreachable ();
46901
46902 if (is_end_basic_block (insn_group))
46903 {
46904 /* End of basic block is reached do end-basic-block process. */
46905 process_end_window ();
46906 return;
46907 }
46908 }
46909
46910 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46911
46912 DEBUG_FUNCTION static void
46913 debug_dispatch_window_file (FILE *file, int window_num)
46914 {
46915 dispatch_windows *list;
46916 int i;
46917
46918 if (window_num == 0)
46919 list = dispatch_window_list;
46920 else
46921 list = dispatch_window_list1;
46922
46923 fprintf (file, "Window #%d:\n", list->window_num);
46924 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46925 list->num_insn, list->num_uops, list->window_size);
46926 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46927 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46928
46929 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46930 list->num_stores);
46931 fprintf (file, " insn info:\n");
46932
46933 for (i = 0; i < MAX_INSN; i++)
46934 {
46935 if (!list->window[i].insn)
46936 break;
46937 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46938 i, group_name[list->window[i].group],
46939 i, (void *)list->window[i].insn,
46940 i, list->window[i].path,
46941 i, list->window[i].byte_len,
46942 i, list->window[i].imm_bytes);
46943 }
46944 }
46945
46946 /* Print to stdout a dispatch window. */
46947
46948 DEBUG_FUNCTION void
46949 debug_dispatch_window (int window_num)
46950 {
46951 debug_dispatch_window_file (stdout, window_num);
46952 }
46953
46954 /* Print INSN dispatch information to FILE. */
46955
46956 DEBUG_FUNCTION static void
46957 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
46958 {
46959 int byte_len;
46960 enum insn_path path;
46961 enum dispatch_group group;
46962 int imm_size;
46963 int num_imm_operand;
46964 int num_imm32_operand;
46965 int num_imm64_operand;
46966
46967 if (INSN_CODE (insn) < 0)
46968 return;
46969
46970 byte_len = min_insn_size (insn);
46971 path = get_insn_path (insn);
46972 group = get_insn_group (insn);
46973 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46974 &num_imm64_operand);
46975
46976 fprintf (file, " insn info:\n");
46977 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46978 group_name[group], path, byte_len);
46979 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46980 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46981 }
46982
46983 /* Print to STDERR the status of the ready list with respect to
46984 dispatch windows. */
46985
46986 DEBUG_FUNCTION void
46987 debug_ready_dispatch (void)
46988 {
46989 int i;
46990 int no_ready = number_in_ready ();
46991
46992 fprintf (stdout, "Number of ready: %d\n", no_ready);
46993
46994 for (i = 0; i < no_ready; i++)
46995 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46996 }
46997
46998 /* This routine is the driver of the dispatch scheduler. */
46999
47000 static void
47001 do_dispatch (rtx_insn *insn, int mode)
47002 {
47003 if (mode == DISPATCH_INIT)
47004 init_dispatch_sched ();
47005 else if (mode == ADD_TO_DISPATCH_WINDOW)
47006 add_to_dispatch_window (insn);
47007 }
47008
47009 /* Return TRUE if Dispatch Scheduling is supported. */
47010
47011 static bool
47012 has_dispatch (rtx_insn *insn, int action)
47013 {
47014 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
47015 && flag_dispatch_scheduler)
47016 switch (action)
47017 {
47018 default:
47019 return false;
47020
47021 case IS_DISPATCH_ON:
47022 return true;
47023 break;
47024
47025 case IS_CMP:
47026 return is_cmp (insn);
47027
47028 case DISPATCH_VIOLATION:
47029 return dispatch_violation ();
47030
47031 case FITS_DISPATCH_WINDOW:
47032 return fits_dispatch_window (insn);
47033 }
47034
47035 return false;
47036 }
47037
47038 /* Implementation of reassociation_width target hook used by
47039 reassoc phase to identify parallelism level in reassociated
47040 tree. Statements tree_code is passed in OPC. Arguments type
47041 is passed in MODE.
47042
47043 Currently parallel reassociation is enabled for Atom
47044 processors only and we set reassociation width to be 2
47045 because Atom may issue up to 2 instructions per cycle.
47046
47047 Return value should be fixed if parallel reassociation is
47048 enabled for other processors. */
47049
47050 static int
47051 ix86_reassociation_width (unsigned int, enum machine_mode mode)
47052 {
47053 int res = 1;
47054
47055 /* Vector part. */
47056 if (VECTOR_MODE_P (mode))
47057 {
47058 if (TARGET_VECTOR_PARALLEL_EXECUTION)
47059 return 2;
47060 else
47061 return 1;
47062 }
47063
47064 /* Scalar part. */
47065 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
47066 res = 2;
47067 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
47068 res = 2;
47069
47070 return res;
47071 }
47072
47073 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
47074 place emms and femms instructions. */
47075
47076 static enum machine_mode
47077 ix86_preferred_simd_mode (enum machine_mode mode)
47078 {
47079 if (!TARGET_SSE)
47080 return word_mode;
47081
47082 switch (mode)
47083 {
47084 case QImode:
47085 return TARGET_AVX512BW ? V64QImode :
47086 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
47087 case HImode:
47088 return TARGET_AVX512BW ? V32HImode :
47089 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
47090 case SImode:
47091 return TARGET_AVX512F ? V16SImode :
47092 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
47093 case DImode:
47094 return TARGET_AVX512F ? V8DImode :
47095 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
47096
47097 case SFmode:
47098 if (TARGET_AVX512F)
47099 return V16SFmode;
47100 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47101 return V8SFmode;
47102 else
47103 return V4SFmode;
47104
47105 case DFmode:
47106 if (!TARGET_VECTORIZE_DOUBLE)
47107 return word_mode;
47108 else if (TARGET_AVX512F)
47109 return V8DFmode;
47110 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47111 return V4DFmode;
47112 else if (TARGET_SSE2)
47113 return V2DFmode;
47114 /* FALLTHRU */
47115
47116 default:
47117 return word_mode;
47118 }
47119 }
47120
47121 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
47122 vectors. If AVX512F is enabled then try vectorizing with 512bit,
47123 256bit and 128bit vectors. */
47124
47125 static unsigned int
47126 ix86_autovectorize_vector_sizes (void)
47127 {
47128 return TARGET_AVX512F ? 64 | 32 | 16 :
47129 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
47130 }
47131
47132 \f
47133
47134 /* Return class of registers which could be used for pseudo of MODE
47135 and of class RCLASS for spilling instead of memory. Return NO_REGS
47136 if it is not possible or non-profitable. */
47137 static reg_class_t
47138 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
47139 {
47140 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
47141 && (mode == SImode || (TARGET_64BIT && mode == DImode))
47142 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
47143 return ALL_SSE_REGS;
47144 return NO_REGS;
47145 }
47146
47147 /* Implement targetm.vectorize.init_cost. */
47148
47149 static void *
47150 ix86_init_cost (struct loop *)
47151 {
47152 unsigned *cost = XNEWVEC (unsigned, 3);
47153 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
47154 return cost;
47155 }
47156
47157 /* Implement targetm.vectorize.add_stmt_cost. */
47158
47159 static unsigned
47160 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
47161 struct _stmt_vec_info *stmt_info, int misalign,
47162 enum vect_cost_model_location where)
47163 {
47164 unsigned *cost = (unsigned *) data;
47165 unsigned retval = 0;
47166
47167 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
47168 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
47169
47170 /* Statements in an inner loop relative to the loop being
47171 vectorized are weighted more heavily. The value here is
47172 arbitrary and could potentially be improved with analysis. */
47173 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
47174 count *= 50; /* FIXME. */
47175
47176 retval = (unsigned) (count * stmt_cost);
47177
47178 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
47179 for Silvermont as it has out of order integer pipeline and can execute
47180 2 scalar instruction per tick, but has in order SIMD pipeline. */
47181 if (TARGET_SILVERMONT || TARGET_INTEL)
47182 if (stmt_info && stmt_info->stmt)
47183 {
47184 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
47185 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
47186 retval = (retval * 17) / 10;
47187 }
47188
47189 cost[where] += retval;
47190
47191 return retval;
47192 }
47193
47194 /* Implement targetm.vectorize.finish_cost. */
47195
47196 static void
47197 ix86_finish_cost (void *data, unsigned *prologue_cost,
47198 unsigned *body_cost, unsigned *epilogue_cost)
47199 {
47200 unsigned *cost = (unsigned *) data;
47201 *prologue_cost = cost[vect_prologue];
47202 *body_cost = cost[vect_body];
47203 *epilogue_cost = cost[vect_epilogue];
47204 }
47205
47206 /* Implement targetm.vectorize.destroy_cost_data. */
47207
47208 static void
47209 ix86_destroy_cost_data (void *data)
47210 {
47211 free (data);
47212 }
47213
47214 /* Validate target specific memory model bits in VAL. */
47215
47216 static unsigned HOST_WIDE_INT
47217 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
47218 {
47219 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
47220 bool strong;
47221
47222 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
47223 |MEMMODEL_MASK)
47224 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
47225 {
47226 warning (OPT_Winvalid_memory_model,
47227 "Unknown architecture specific memory model");
47228 return MEMMODEL_SEQ_CST;
47229 }
47230 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
47231 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
47232 {
47233 warning (OPT_Winvalid_memory_model,
47234 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
47235 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
47236 }
47237 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
47238 {
47239 warning (OPT_Winvalid_memory_model,
47240 "HLE_RELEASE not used with RELEASE or stronger memory model");
47241 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
47242 }
47243 return val;
47244 }
47245
47246 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
47247 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
47248 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
47249 or number of vecsize_mangle variants that should be emitted. */
47250
47251 static int
47252 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
47253 struct cgraph_simd_clone *clonei,
47254 tree base_type, int num)
47255 {
47256 int ret = 1;
47257
47258 if (clonei->simdlen
47259 && (clonei->simdlen < 2
47260 || clonei->simdlen > 16
47261 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
47262 {
47263 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47264 "unsupported simdlen %d", clonei->simdlen);
47265 return 0;
47266 }
47267
47268 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
47269 if (TREE_CODE (ret_type) != VOID_TYPE)
47270 switch (TYPE_MODE (ret_type))
47271 {
47272 case QImode:
47273 case HImode:
47274 case SImode:
47275 case DImode:
47276 case SFmode:
47277 case DFmode:
47278 /* case SCmode: */
47279 /* case DCmode: */
47280 break;
47281 default:
47282 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47283 "unsupported return type %qT for simd\n", ret_type);
47284 return 0;
47285 }
47286
47287 tree t;
47288 int i;
47289
47290 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
47291 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
47292 switch (TYPE_MODE (TREE_TYPE (t)))
47293 {
47294 case QImode:
47295 case HImode:
47296 case SImode:
47297 case DImode:
47298 case SFmode:
47299 case DFmode:
47300 /* case SCmode: */
47301 /* case DCmode: */
47302 break;
47303 default:
47304 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47305 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
47306 return 0;
47307 }
47308
47309 if (clonei->cilk_elemental)
47310 {
47311 /* Parse here processor clause. If not present, default to 'b'. */
47312 clonei->vecsize_mangle = 'b';
47313 }
47314 else if (!TREE_PUBLIC (node->decl))
47315 {
47316 /* If the function isn't exported, we can pick up just one ISA
47317 for the clones. */
47318 if (TARGET_AVX2)
47319 clonei->vecsize_mangle = 'd';
47320 else if (TARGET_AVX)
47321 clonei->vecsize_mangle = 'c';
47322 else
47323 clonei->vecsize_mangle = 'b';
47324 ret = 1;
47325 }
47326 else
47327 {
47328 clonei->vecsize_mangle = "bcd"[num];
47329 ret = 3;
47330 }
47331 switch (clonei->vecsize_mangle)
47332 {
47333 case 'b':
47334 clonei->vecsize_int = 128;
47335 clonei->vecsize_float = 128;
47336 break;
47337 case 'c':
47338 clonei->vecsize_int = 128;
47339 clonei->vecsize_float = 256;
47340 break;
47341 case 'd':
47342 clonei->vecsize_int = 256;
47343 clonei->vecsize_float = 256;
47344 break;
47345 }
47346 if (clonei->simdlen == 0)
47347 {
47348 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
47349 clonei->simdlen = clonei->vecsize_int;
47350 else
47351 clonei->simdlen = clonei->vecsize_float;
47352 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
47353 if (clonei->simdlen > 16)
47354 clonei->simdlen = 16;
47355 }
47356 return ret;
47357 }
47358
47359 /* Add target attribute to SIMD clone NODE if needed. */
47360
47361 static void
47362 ix86_simd_clone_adjust (struct cgraph_node *node)
47363 {
47364 const char *str = NULL;
47365 gcc_assert (node->decl == cfun->decl);
47366 switch (node->simdclone->vecsize_mangle)
47367 {
47368 case 'b':
47369 if (!TARGET_SSE2)
47370 str = "sse2";
47371 break;
47372 case 'c':
47373 if (!TARGET_AVX)
47374 str = "avx";
47375 break;
47376 case 'd':
47377 if (!TARGET_AVX2)
47378 str = "avx2";
47379 break;
47380 default:
47381 gcc_unreachable ();
47382 }
47383 if (str == NULL)
47384 return;
47385 push_cfun (NULL);
47386 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
47387 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
47388 gcc_assert (ok);
47389 pop_cfun ();
47390 ix86_previous_fndecl = NULL_TREE;
47391 ix86_set_current_function (node->decl);
47392 }
47393
47394 /* If SIMD clone NODE can't be used in a vectorized loop
47395 in current function, return -1, otherwise return a badness of using it
47396 (0 if it is most desirable from vecsize_mangle point of view, 1
47397 slightly less desirable, etc.). */
47398
47399 static int
47400 ix86_simd_clone_usable (struct cgraph_node *node)
47401 {
47402 switch (node->simdclone->vecsize_mangle)
47403 {
47404 case 'b':
47405 if (!TARGET_SSE2)
47406 return -1;
47407 if (!TARGET_AVX)
47408 return 0;
47409 return TARGET_AVX2 ? 2 : 1;
47410 case 'c':
47411 if (!TARGET_AVX)
47412 return -1;
47413 return TARGET_AVX2 ? 1 : 0;
47414 break;
47415 case 'd':
47416 if (!TARGET_AVX2)
47417 return -1;
47418 return 0;
47419 default:
47420 gcc_unreachable ();
47421 }
47422 }
47423
47424 /* This function gives out the number of memory references.
47425 This value determines the unrolling factor for
47426 bdver3 and bdver4 architectures. */
47427
47428 static int
47429 ix86_loop_memcount (rtx *x, unsigned *mem_count)
47430 {
47431 if (*x != NULL_RTX && MEM_P (*x))
47432 {
47433 enum machine_mode mode;
47434 unsigned int n_words;
47435
47436 mode = GET_MODE (*x);
47437 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47438
47439 if (n_words > 4)
47440 (*mem_count)+=2;
47441 else
47442 (*mem_count)+=1;
47443 }
47444 return 0;
47445 }
47446
47447 /* This function adjusts the unroll factor based on
47448 the hardware capabilities. For ex, bdver3 has
47449 a loop buffer which makes unrolling of smaller
47450 loops less important. This function decides the
47451 unroll factor using number of memory references
47452 (value 32 is used) as a heuristic. */
47453
47454 static unsigned
47455 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
47456 {
47457 basic_block *bbs;
47458 rtx_insn *insn;
47459 unsigned i;
47460 unsigned mem_count = 0;
47461
47462 if (!TARGET_ADJUST_UNROLL)
47463 return nunroll;
47464
47465 /* Count the number of memory references within the loop body. */
47466 bbs = get_loop_body (loop);
47467 for (i = 0; i < loop->num_nodes; i++)
47468 {
47469 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
47470 if (NONDEBUG_INSN_P (insn))
47471 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
47472 &mem_count);
47473 }
47474 free (bbs);
47475
47476 if (mem_count && mem_count <=32)
47477 return 32/mem_count;
47478
47479 return nunroll;
47480 }
47481
47482
47483 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47484
47485 static bool
47486 ix86_float_exceptions_rounding_supported_p (void)
47487 {
47488 /* For x87 floating point with standard excess precision handling,
47489 there is no adddf3 pattern (since x87 floating point only has
47490 XFmode operations) so the default hook implementation gets this
47491 wrong. */
47492 return TARGET_80387 || TARGET_SSE_MATH;
47493 }
47494
47495 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47496
47497 static void
47498 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47499 {
47500 if (!TARGET_80387 && !TARGET_SSE_MATH)
47501 return;
47502 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47503 if (TARGET_80387)
47504 {
47505 tree fenv_index_type = build_index_type (size_int (6));
47506 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47507 tree fenv_var = create_tmp_var (fenv_type, NULL);
47508 mark_addressable (fenv_var);
47509 tree fenv_ptr = build_pointer_type (fenv_type);
47510 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47511 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47512 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47513 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47514 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47515 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47516 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47517 tree hold_fnclex = build_call_expr (fnclex, 0);
47518 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47519 hold_fnclex);
47520 *clear = build_call_expr (fnclex, 0);
47521 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47522 tree fnstsw_call = build_call_expr (fnstsw, 0);
47523 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
47524 sw_var, fnstsw_call);
47525 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47526 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47527 exceptions_var, exceptions_x87);
47528 *update = build2 (COMPOUND_EXPR, integer_type_node,
47529 sw_mod, update_mod);
47530 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47531 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47532 }
47533 if (TARGET_SSE_MATH)
47534 {
47535 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47536 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47537 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47538 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47539 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47540 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47541 mxcsr_orig_var, stmxcsr_hold_call);
47542 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47543 mxcsr_orig_var,
47544 build_int_cst (unsigned_type_node, 0x1f80));
47545 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47546 build_int_cst (unsigned_type_node, 0xffffffc0));
47547 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47548 mxcsr_mod_var, hold_mod_val);
47549 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47550 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47551 hold_assign_orig, hold_assign_mod);
47552 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47553 ldmxcsr_hold_call);
47554 if (*hold)
47555 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47556 else
47557 *hold = hold_all;
47558 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47559 if (*clear)
47560 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47561 ldmxcsr_clear_call);
47562 else
47563 *clear = ldmxcsr_clear_call;
47564 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47565 tree exceptions_sse = fold_convert (integer_type_node,
47566 stxmcsr_update_call);
47567 if (*update)
47568 {
47569 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47570 exceptions_var, exceptions_sse);
47571 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47572 exceptions_var, exceptions_mod);
47573 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47574 exceptions_assign);
47575 }
47576 else
47577 *update = build2 (MODIFY_EXPR, integer_type_node,
47578 exceptions_var, exceptions_sse);
47579 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47580 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47581 ldmxcsr_update_call);
47582 }
47583 tree atomic_feraiseexcept
47584 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47585 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47586 1, exceptions_var);
47587 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47588 atomic_feraiseexcept_call);
47589 }
47590
47591 /* Initialize the GCC target structure. */
47592 #undef TARGET_RETURN_IN_MEMORY
47593 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47594
47595 #undef TARGET_LEGITIMIZE_ADDRESS
47596 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47597
47598 #undef TARGET_ATTRIBUTE_TABLE
47599 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47600 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47601 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47602 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47603 # undef TARGET_MERGE_DECL_ATTRIBUTES
47604 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47605 #endif
47606
47607 #undef TARGET_COMP_TYPE_ATTRIBUTES
47608 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47609
47610 #undef TARGET_INIT_BUILTINS
47611 #define TARGET_INIT_BUILTINS ix86_init_builtins
47612 #undef TARGET_BUILTIN_DECL
47613 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47614 #undef TARGET_EXPAND_BUILTIN
47615 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47616
47617 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47618 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47619 ix86_builtin_vectorized_function
47620
47621 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47622 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47623
47624 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47625 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47626
47627 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47628 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47629
47630 #undef TARGET_BUILTIN_RECIPROCAL
47631 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47632
47633 #undef TARGET_ASM_FUNCTION_EPILOGUE
47634 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47635
47636 #undef TARGET_ENCODE_SECTION_INFO
47637 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47638 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47639 #else
47640 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47641 #endif
47642
47643 #undef TARGET_ASM_OPEN_PAREN
47644 #define TARGET_ASM_OPEN_PAREN ""
47645 #undef TARGET_ASM_CLOSE_PAREN
47646 #define TARGET_ASM_CLOSE_PAREN ""
47647
47648 #undef TARGET_ASM_BYTE_OP
47649 #define TARGET_ASM_BYTE_OP ASM_BYTE
47650
47651 #undef TARGET_ASM_ALIGNED_HI_OP
47652 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47653 #undef TARGET_ASM_ALIGNED_SI_OP
47654 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47655 #ifdef ASM_QUAD
47656 #undef TARGET_ASM_ALIGNED_DI_OP
47657 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47658 #endif
47659
47660 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47661 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47662
47663 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47664 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47665
47666 #undef TARGET_ASM_UNALIGNED_HI_OP
47667 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47668 #undef TARGET_ASM_UNALIGNED_SI_OP
47669 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47670 #undef TARGET_ASM_UNALIGNED_DI_OP
47671 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47672
47673 #undef TARGET_PRINT_OPERAND
47674 #define TARGET_PRINT_OPERAND ix86_print_operand
47675 #undef TARGET_PRINT_OPERAND_ADDRESS
47676 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47677 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47678 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47679 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47680 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47681
47682 #undef TARGET_SCHED_INIT_GLOBAL
47683 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47684 #undef TARGET_SCHED_ADJUST_COST
47685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47686 #undef TARGET_SCHED_ISSUE_RATE
47687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47690 ia32_multipass_dfa_lookahead
47691 #undef TARGET_SCHED_MACRO_FUSION_P
47692 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47693 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47694 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47695
47696 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47697 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47698
47699 #undef TARGET_MEMMODEL_CHECK
47700 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47701
47702 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47703 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47704
47705 #ifdef HAVE_AS_TLS
47706 #undef TARGET_HAVE_TLS
47707 #define TARGET_HAVE_TLS true
47708 #endif
47709 #undef TARGET_CANNOT_FORCE_CONST_MEM
47710 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47711 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47712 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47713
47714 #undef TARGET_DELEGITIMIZE_ADDRESS
47715 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47716
47717 #undef TARGET_MS_BITFIELD_LAYOUT_P
47718 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47719
47720 #if TARGET_MACHO
47721 #undef TARGET_BINDS_LOCAL_P
47722 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47723 #endif
47724 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47725 #undef TARGET_BINDS_LOCAL_P
47726 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47727 #endif
47728
47729 #undef TARGET_ASM_OUTPUT_MI_THUNK
47730 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47733
47734 #undef TARGET_ASM_FILE_START
47735 #define TARGET_ASM_FILE_START x86_file_start
47736
47737 #undef TARGET_OPTION_OVERRIDE
47738 #define TARGET_OPTION_OVERRIDE ix86_option_override
47739
47740 #undef TARGET_REGISTER_MOVE_COST
47741 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47742 #undef TARGET_MEMORY_MOVE_COST
47743 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47744 #undef TARGET_RTX_COSTS
47745 #define TARGET_RTX_COSTS ix86_rtx_costs
47746 #undef TARGET_ADDRESS_COST
47747 #define TARGET_ADDRESS_COST ix86_address_cost
47748
47749 #undef TARGET_FIXED_CONDITION_CODE_REGS
47750 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47751 #undef TARGET_CC_MODES_COMPATIBLE
47752 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47753
47754 #undef TARGET_MACHINE_DEPENDENT_REORG
47755 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47756
47757 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47758 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47759
47760 #undef TARGET_BUILD_BUILTIN_VA_LIST
47761 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47762
47763 #undef TARGET_FOLD_BUILTIN
47764 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47765
47766 #undef TARGET_COMPARE_VERSION_PRIORITY
47767 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47768
47769 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47770 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47771 ix86_generate_version_dispatcher_body
47772
47773 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47774 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47775 ix86_get_function_versions_dispatcher
47776
47777 #undef TARGET_ENUM_VA_LIST_P
47778 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47779
47780 #undef TARGET_FN_ABI_VA_LIST
47781 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47782
47783 #undef TARGET_CANONICAL_VA_LIST_TYPE
47784 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47785
47786 #undef TARGET_EXPAND_BUILTIN_VA_START
47787 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47788
47789 #undef TARGET_MD_ASM_CLOBBERS
47790 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47791
47792 #undef TARGET_PROMOTE_PROTOTYPES
47793 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47794 #undef TARGET_SETUP_INCOMING_VARARGS
47795 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47796 #undef TARGET_MUST_PASS_IN_STACK
47797 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47798 #undef TARGET_FUNCTION_ARG_ADVANCE
47799 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47800 #undef TARGET_FUNCTION_ARG
47801 #define TARGET_FUNCTION_ARG ix86_function_arg
47802 #undef TARGET_INIT_PIC_REG
47803 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
47804 #undef TARGET_USE_PSEUDO_PIC_REG
47805 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
47806 #undef TARGET_FUNCTION_ARG_BOUNDARY
47807 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47808 #undef TARGET_PASS_BY_REFERENCE
47809 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47810 #undef TARGET_INTERNAL_ARG_POINTER
47811 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47812 #undef TARGET_UPDATE_STACK_BOUNDARY
47813 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47814 #undef TARGET_GET_DRAP_RTX
47815 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47816 #undef TARGET_STRICT_ARGUMENT_NAMING
47817 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47818 #undef TARGET_STATIC_CHAIN
47819 #define TARGET_STATIC_CHAIN ix86_static_chain
47820 #undef TARGET_TRAMPOLINE_INIT
47821 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47822 #undef TARGET_RETURN_POPS_ARGS
47823 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47824
47825 #undef TARGET_LEGITIMATE_COMBINED_INSN
47826 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47827
47828 #undef TARGET_ASAN_SHADOW_OFFSET
47829 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47830
47831 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47832 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47833
47834 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47835 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47836
47837 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47838 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47839
47840 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47841 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47842 ix86_libgcc_floating_mode_supported_p
47843
47844 #undef TARGET_C_MODE_FOR_SUFFIX
47845 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47846
47847 #ifdef HAVE_AS_TLS
47848 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47849 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47850 #endif
47851
47852 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47853 #undef TARGET_INSERT_ATTRIBUTES
47854 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47855 #endif
47856
47857 #undef TARGET_MANGLE_TYPE
47858 #define TARGET_MANGLE_TYPE ix86_mangle_type
47859
47860 #if !TARGET_MACHO
47861 #undef TARGET_STACK_PROTECT_FAIL
47862 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47863 #endif
47864
47865 #undef TARGET_FUNCTION_VALUE
47866 #define TARGET_FUNCTION_VALUE ix86_function_value
47867
47868 #undef TARGET_FUNCTION_VALUE_REGNO_P
47869 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47870
47871 #undef TARGET_PROMOTE_FUNCTION_MODE
47872 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47873
47874 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47875 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47876
47877 #undef TARGET_INSTANTIATE_DECLS
47878 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47879
47880 #undef TARGET_SECONDARY_RELOAD
47881 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47882
47883 #undef TARGET_CLASS_MAX_NREGS
47884 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47885
47886 #undef TARGET_PREFERRED_RELOAD_CLASS
47887 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47888 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47889 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47890 #undef TARGET_CLASS_LIKELY_SPILLED_P
47891 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47892
47893 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47894 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47895 ix86_builtin_vectorization_cost
47896 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47897 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47898 ix86_vectorize_vec_perm_const_ok
47899 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47900 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47901 ix86_preferred_simd_mode
47902 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47903 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47904 ix86_autovectorize_vector_sizes
47905 #undef TARGET_VECTORIZE_INIT_COST
47906 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47907 #undef TARGET_VECTORIZE_ADD_STMT_COST
47908 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47909 #undef TARGET_VECTORIZE_FINISH_COST
47910 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47911 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47912 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47913
47914 #undef TARGET_SET_CURRENT_FUNCTION
47915 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47916
47917 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47918 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47919
47920 #undef TARGET_OPTION_SAVE
47921 #define TARGET_OPTION_SAVE ix86_function_specific_save
47922
47923 #undef TARGET_OPTION_RESTORE
47924 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47925
47926 #undef TARGET_OPTION_PRINT
47927 #define TARGET_OPTION_PRINT ix86_function_specific_print
47928
47929 #undef TARGET_OPTION_FUNCTION_VERSIONS
47930 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47931
47932 #undef TARGET_CAN_INLINE_P
47933 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47934
47935 #undef TARGET_EXPAND_TO_RTL_HOOK
47936 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47937
47938 #undef TARGET_LEGITIMATE_ADDRESS_P
47939 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47940
47941 #undef TARGET_LRA_P
47942 #define TARGET_LRA_P hook_bool_void_true
47943
47944 #undef TARGET_REGISTER_PRIORITY
47945 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47946
47947 #undef TARGET_REGISTER_USAGE_LEVELING_P
47948 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47949
47950 #undef TARGET_LEGITIMATE_CONSTANT_P
47951 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47952
47953 #undef TARGET_FRAME_POINTER_REQUIRED
47954 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47955
47956 #undef TARGET_CAN_ELIMINATE
47957 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47958
47959 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47960 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47961
47962 #undef TARGET_ASM_CODE_END
47963 #define TARGET_ASM_CODE_END ix86_code_end
47964
47965 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47966 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47967
47968 #if TARGET_MACHO
47969 #undef TARGET_INIT_LIBFUNCS
47970 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47971 #endif
47972
47973 #undef TARGET_LOOP_UNROLL_ADJUST
47974 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47975
47976 #undef TARGET_SPILL_CLASS
47977 #define TARGET_SPILL_CLASS ix86_spill_class
47978
47979 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47980 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47981 ix86_simd_clone_compute_vecsize_and_simdlen
47982
47983 #undef TARGET_SIMD_CLONE_ADJUST
47984 #define TARGET_SIMD_CLONE_ADJUST \
47985 ix86_simd_clone_adjust
47986
47987 #undef TARGET_SIMD_CLONE_USABLE
47988 #define TARGET_SIMD_CLONE_USABLE \
47989 ix86_simd_clone_usable
47990
47991 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47992 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47993 ix86_float_exceptions_rounding_supported_p
47994
47995 #undef TARGET_MODE_EMIT
47996 #define TARGET_MODE_EMIT ix86_emit_mode_set
47997
47998 #undef TARGET_MODE_NEEDED
47999 #define TARGET_MODE_NEEDED ix86_mode_needed
48000
48001 #undef TARGET_MODE_AFTER
48002 #define TARGET_MODE_AFTER ix86_mode_after
48003
48004 #undef TARGET_MODE_ENTRY
48005 #define TARGET_MODE_ENTRY ix86_mode_entry
48006
48007 #undef TARGET_MODE_EXIT
48008 #define TARGET_MODE_EXIT ix86_mode_exit
48009
48010 #undef TARGET_MODE_PRIORITY
48011 #define TARGET_MODE_PRIORITY ix86_mode_priority
48012
48013 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
48014 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
48015
48016 struct gcc_target targetm = TARGET_INITIALIZER;
48017 \f
48018 #include "gt-i386.h"