gcc/
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2597 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2598 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2599 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2600 { "-msse3", OPTION_MASK_ISA_SSE3 },
2601 { "-msse2", OPTION_MASK_ISA_SSE2 },
2602 { "-msse", OPTION_MASK_ISA_SSE },
2603 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2604 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2605 { "-mmmx", OPTION_MASK_ISA_MMX },
2606 { "-mabm", OPTION_MASK_ISA_ABM },
2607 { "-mbmi", OPTION_MASK_ISA_BMI },
2608 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2609 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2610 { "-mhle", OPTION_MASK_ISA_HLE },
2611 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2612 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2613 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2614 { "-madx", OPTION_MASK_ISA_ADX },
2615 { "-mtbm", OPTION_MASK_ISA_TBM },
2616 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2617 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2618 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2619 { "-maes", OPTION_MASK_ISA_AES },
2620 { "-msha", OPTION_MASK_ISA_SHA },
2621 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2622 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2623 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2624 { "-mf16c", OPTION_MASK_ISA_F16C },
2625 { "-mrtm", OPTION_MASK_ISA_RTM },
2626 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2627 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2628 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2629 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2630 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2631 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2632 };
2633
2634 /* Flag options. */
2635 static struct ix86_target_opts flag_opts[] =
2636 {
2637 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2638 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2639 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2640 { "-m80387", MASK_80387 },
2641 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2642 { "-malign-double", MASK_ALIGN_DOUBLE },
2643 { "-mcld", MASK_CLD },
2644 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2645 { "-mieee-fp", MASK_IEEE_FP },
2646 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2647 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2648 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2649 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2650 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2651 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2652 { "-mno-red-zone", MASK_NO_RED_ZONE },
2653 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2654 { "-mrecip", MASK_RECIP },
2655 { "-mrtd", MASK_RTD },
2656 { "-msseregparm", MASK_SSEREGPARM },
2657 { "-mstack-arg-probe", MASK_STACK_PROBE },
2658 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2659 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2660 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2661 { "-mvzeroupper", MASK_VZEROUPPER },
2662 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2663 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2664 { "-mprefer-avx128", MASK_PREFER_AVX128},
2665 };
2666
2667 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2668
2669 char isa_other[40];
2670 char target_other[40];
2671 unsigned num = 0;
2672 unsigned i, j;
2673 char *ret;
2674 char *ptr;
2675 size_t len;
2676 size_t line_len;
2677 size_t sep_len;
2678 const char *abi;
2679
2680 memset (opts, '\0', sizeof (opts));
2681
2682 /* Add -march= option. */
2683 if (arch)
2684 {
2685 opts[num][0] = "-march=";
2686 opts[num++][1] = arch;
2687 }
2688
2689 /* Add -mtune= option. */
2690 if (tune)
2691 {
2692 opts[num][0] = "-mtune=";
2693 opts[num++][1] = tune;
2694 }
2695
2696 /* Add -m32/-m64/-mx32. */
2697 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2698 {
2699 if ((isa & OPTION_MASK_ABI_64) != 0)
2700 abi = "-m64";
2701 else
2702 abi = "-mx32";
2703 isa &= ~ (OPTION_MASK_ISA_64BIT
2704 | OPTION_MASK_ABI_64
2705 | OPTION_MASK_ABI_X32);
2706 }
2707 else
2708 abi = "-m32";
2709 opts[num++][0] = abi;
2710
2711 /* Pick out the options in isa options. */
2712 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2713 {
2714 if ((isa & isa_opts[i].mask) != 0)
2715 {
2716 opts[num++][0] = isa_opts[i].option;
2717 isa &= ~ isa_opts[i].mask;
2718 }
2719 }
2720
2721 if (isa && add_nl_p)
2722 {
2723 opts[num++][0] = isa_other;
2724 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2725 isa);
2726 }
2727
2728 /* Add flag options. */
2729 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2730 {
2731 if ((flags & flag_opts[i].mask) != 0)
2732 {
2733 opts[num++][0] = flag_opts[i].option;
2734 flags &= ~ flag_opts[i].mask;
2735 }
2736 }
2737
2738 if (flags && add_nl_p)
2739 {
2740 opts[num++][0] = target_other;
2741 sprintf (target_other, "(other flags: %#x)", flags);
2742 }
2743
2744 /* Add -fpmath= option. */
2745 if (fpmath)
2746 {
2747 opts[num][0] = "-mfpmath=";
2748 switch ((int) fpmath)
2749 {
2750 case FPMATH_387:
2751 opts[num++][1] = "387";
2752 break;
2753
2754 case FPMATH_SSE:
2755 opts[num++][1] = "sse";
2756 break;
2757
2758 case FPMATH_387 | FPMATH_SSE:
2759 opts[num++][1] = "sse+387";
2760 break;
2761
2762 default:
2763 gcc_unreachable ();
2764 }
2765 }
2766
2767 /* Any options? */
2768 if (num == 0)
2769 return NULL;
2770
2771 gcc_assert (num < ARRAY_SIZE (opts));
2772
2773 /* Size the string. */
2774 len = 0;
2775 sep_len = (add_nl_p) ? 3 : 1;
2776 for (i = 0; i < num; i++)
2777 {
2778 len += sep_len;
2779 for (j = 0; j < 2; j++)
2780 if (opts[i][j])
2781 len += strlen (opts[i][j]);
2782 }
2783
2784 /* Build the string. */
2785 ret = ptr = (char *) xmalloc (len);
2786 line_len = 0;
2787
2788 for (i = 0; i < num; i++)
2789 {
2790 size_t len2[2];
2791
2792 for (j = 0; j < 2; j++)
2793 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2794
2795 if (i != 0)
2796 {
2797 *ptr++ = ' ';
2798 line_len++;
2799
2800 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2801 {
2802 *ptr++ = '\\';
2803 *ptr++ = '\n';
2804 line_len = 0;
2805 }
2806 }
2807
2808 for (j = 0; j < 2; j++)
2809 if (opts[i][j])
2810 {
2811 memcpy (ptr, opts[i][j], len2[j]);
2812 ptr += len2[j];
2813 line_len += len2[j];
2814 }
2815 }
2816
2817 *ptr = '\0';
2818 gcc_assert (ret + len >= ptr);
2819
2820 return ret;
2821 }
2822
2823 /* Return true, if profiling code should be emitted before
2824 prologue. Otherwise it returns false.
2825 Note: For x86 with "hotfix" it is sorried. */
2826 static bool
2827 ix86_profile_before_prologue (void)
2828 {
2829 return flag_fentry != 0;
2830 }
2831
2832 /* Function that is callable from the debugger to print the current
2833 options. */
2834 void ATTRIBUTE_UNUSED
2835 ix86_debug_options (void)
2836 {
2837 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2838 ix86_arch_string, ix86_tune_string,
2839 ix86_fpmath, true);
2840
2841 if (opts)
2842 {
2843 fprintf (stderr, "%s\n\n", opts);
2844 free (opts);
2845 }
2846 else
2847 fputs ("<no options>\n\n", stderr);
2848
2849 return;
2850 }
2851
2852 static const char *stringop_alg_names[] = {
2853 #define DEF_ENUM
2854 #define DEF_ALG(alg, name) #name,
2855 #include "stringop.def"
2856 #undef DEF_ENUM
2857 #undef DEF_ALG
2858 };
2859
2860 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2861 The string is of the following form (or comma separated list of it):
2862
2863 strategy_alg:max_size:[align|noalign]
2864
2865 where the full size range for the strategy is either [0, max_size] or
2866 [min_size, max_size], in which min_size is the max_size + 1 of the
2867 preceding range. The last size range must have max_size == -1.
2868
2869 Examples:
2870
2871 1.
2872 -mmemcpy-strategy=libcall:-1:noalign
2873
2874 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2875
2876
2877 2.
2878 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2879
2880 This is to tell the compiler to use the following strategy for memset
2881 1) when the expected size is between [1, 16], use rep_8byte strategy;
2882 2) when the size is between [17, 2048], use vector_loop;
2883 3) when the size is > 2048, use libcall. */
2884
2885 struct stringop_size_range
2886 {
2887 int max;
2888 stringop_alg alg;
2889 bool noalign;
2890 };
2891
2892 static void
2893 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2894 {
2895 const struct stringop_algs *default_algs;
2896 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2897 char *curr_range_str, *next_range_str;
2898 int i = 0, n = 0;
2899
2900 if (is_memset)
2901 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2902 else
2903 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2904
2905 curr_range_str = strategy_str;
2906
2907 do
2908 {
2909 int maxs;
2910 char alg_name[128];
2911 char align[16];
2912 next_range_str = strchr (curr_range_str, ',');
2913 if (next_range_str)
2914 *next_range_str++ = '\0';
2915
2916 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2917 alg_name, &maxs, align))
2918 {
2919 error ("wrong arg %s to option %s", curr_range_str,
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2922 }
2923
2924 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2925 {
2926 error ("size ranges of option %s should be increasing",
2927 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2928 return;
2929 }
2930
2931 for (i = 0; i < last_alg; i++)
2932 if (!strcmp (alg_name, stringop_alg_names[i]))
2933 break;
2934
2935 if (i == last_alg)
2936 {
2937 error ("wrong stringop strategy name %s specified for option %s",
2938 alg_name,
2939 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2940 return;
2941 }
2942
2943 input_ranges[n].max = maxs;
2944 input_ranges[n].alg = (stringop_alg) i;
2945 if (!strcmp (align, "align"))
2946 input_ranges[n].noalign = false;
2947 else if (!strcmp (align, "noalign"))
2948 input_ranges[n].noalign = true;
2949 else
2950 {
2951 error ("unknown alignment %s specified for option %s",
2952 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2954 }
2955 n++;
2956 curr_range_str = next_range_str;
2957 }
2958 while (curr_range_str);
2959
2960 if (input_ranges[n - 1].max != -1)
2961 {
2962 error ("the max value for the last size range should be -1"
2963 " for option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2966 }
2967
2968 if (n > MAX_STRINGOP_ALGS)
2969 {
2970 error ("too many size ranges specified in option %s",
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2973 }
2974
2975 /* Now override the default algs array. */
2976 for (i = 0; i < n; i++)
2977 {
2978 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2979 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2980 = input_ranges[i].alg;
2981 *const_cast<int *>(&default_algs->size[i].noalign)
2982 = input_ranges[i].noalign;
2983 }
2984 }
2985
2986 \f
2987 /* parse -mtune-ctrl= option. When DUMP is true,
2988 print the features that are explicitly set. */
2989
2990 static void
2991 parse_mtune_ctrl_str (bool dump)
2992 {
2993 if (!ix86_tune_ctrl_string)
2994 return;
2995
2996 char *next_feature_string = NULL;
2997 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2998 char *orig = curr_feature_string;
2999 int i;
3000 do
3001 {
3002 bool clear = false;
3003
3004 next_feature_string = strchr (curr_feature_string, ',');
3005 if (next_feature_string)
3006 *next_feature_string++ = '\0';
3007 if (*curr_feature_string == '^')
3008 {
3009 curr_feature_string++;
3010 clear = true;
3011 }
3012 for (i = 0; i < X86_TUNE_LAST; i++)
3013 {
3014 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3015 {
3016 ix86_tune_features[i] = !clear;
3017 if (dump)
3018 fprintf (stderr, "Explicitly %s feature %s\n",
3019 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3020 break;
3021 }
3022 }
3023 if (i == X86_TUNE_LAST)
3024 error ("Unknown parameter to option -mtune-ctrl: %s",
3025 clear ? curr_feature_string - 1 : curr_feature_string);
3026 curr_feature_string = next_feature_string;
3027 }
3028 while (curr_feature_string);
3029 free (orig);
3030 }
3031
3032 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3033 processor type. */
3034
3035 static void
3036 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3037 {
3038 unsigned int ix86_tune_mask = 1u << ix86_tune;
3039 int i;
3040
3041 for (i = 0; i < X86_TUNE_LAST; ++i)
3042 {
3043 if (ix86_tune_no_default)
3044 ix86_tune_features[i] = 0;
3045 else
3046 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3047 }
3048
3049 if (dump)
3050 {
3051 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3052 for (i = 0; i < X86_TUNE_LAST; i++)
3053 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3054 ix86_tune_features[i] ? "on" : "off");
3055 }
3056
3057 parse_mtune_ctrl_str (dump);
3058 }
3059
3060
3061 /* Override various settings based on options. If MAIN_ARGS_P, the
3062 options are from the command line, otherwise they are from
3063 attributes. */
3064
3065 static void
3066 ix86_option_override_internal (bool main_args_p,
3067 struct gcc_options *opts,
3068 struct gcc_options *opts_set)
3069 {
3070 int i;
3071 unsigned int ix86_arch_mask;
3072 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3073 const char *prefix;
3074 const char *suffix;
3075 const char *sw;
3076
3077 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3078 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3079 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3080 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3081 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3082 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3083 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3084 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3085 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3086 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3087 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3088 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3089 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3090 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3091 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3092 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3093 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3094 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3095 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3096 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3097 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3098 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3099 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3100 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3101 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3102 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3103 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3104 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3105 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3106 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3107 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3108 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3109 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3110 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3111 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3112 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3113 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3114 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3115 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3116 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3117 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3118 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3119 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3120 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3121 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3122 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3123 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3124 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3125 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3126 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3127
3128 #define PTA_CORE2 \
3129 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3130 | PTA_CX16 | PTA_FXSR)
3131 #define PTA_NEHALEM \
3132 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3133 #define PTA_WESTMERE \
3134 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3135 #define PTA_SANDYBRIDGE \
3136 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3137 #define PTA_IVYBRIDGE \
3138 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3139 #define PTA_HASWELL \
3140 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3141 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3142 #define PTA_BROADWELL \
3143 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3144 #define PTA_BONNELL \
3145 (PTA_CORE2 | PTA_MOVBE)
3146 #define PTA_SILVERMONT \
3147 (PTA_WESTMERE | PTA_MOVBE)
3148
3149 /* if this reaches 64, need to widen struct pta flags below */
3150
3151 static struct pta
3152 {
3153 const char *const name; /* processor name or nickname. */
3154 const enum processor_type processor;
3155 const enum attr_cpu schedule;
3156 const unsigned HOST_WIDE_INT flags;
3157 }
3158 const processor_alias_table[] =
3159 {
3160 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3161 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3162 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3164 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3165 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3166 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3168 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3169 PTA_MMX | PTA_SSE | PTA_FXSR},
3170 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3172 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3173 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3174 PTA_MMX | PTA_SSE | PTA_FXSR},
3175 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3176 PTA_MMX | PTA_SSE | PTA_FXSR},
3177 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3179 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3180 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3181 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3184 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3185 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3186 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3187 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3188 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3189 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3191 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3192 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3193 PTA_SANDYBRIDGE},
3194 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3195 PTA_SANDYBRIDGE},
3196 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_IVYBRIDGE},
3198 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_IVYBRIDGE},
3200 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3202 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3203 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3205 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3207 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3208 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3209 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3210 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3211 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3213 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3214 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3215 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3216 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3217 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3219 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3221 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"x86-64", PROCESSOR_K8, CPU_K8,
3224 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3225 {"k8", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"opteron", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3236 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3237 {"athlon64", PROCESSOR_K8, CPU_K8,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3239 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3240 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3241 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3242 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3243 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3244 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3245 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3246 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3247 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3248 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3249 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3250 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3251 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3252 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3253 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3254 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3255 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3256 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3257 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3258 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3260 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3261 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3262 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3263 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3264 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3265 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3266 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3267 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3268 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3269 | PTA_XSAVEOPT | PTA_FSGSBASE},
3270 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3273 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3274 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3275 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3276 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3277 | PTA_MOVBE},
3278 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3279 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3280 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3281 | PTA_FXSR | PTA_XSAVE},
3282 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3285 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3286 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3287 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3288
3289 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3290 PTA_64BIT
3291 | PTA_HLE /* flags are only used for -march switch. */ },
3292 };
3293
3294 /* -mrecip options. */
3295 static struct
3296 {
3297 const char *string; /* option name */
3298 unsigned int mask; /* mask bits to set */
3299 }
3300 const recip_options[] =
3301 {
3302 { "all", RECIP_MASK_ALL },
3303 { "none", RECIP_MASK_NONE },
3304 { "div", RECIP_MASK_DIV },
3305 { "sqrt", RECIP_MASK_SQRT },
3306 { "vec-div", RECIP_MASK_VEC_DIV },
3307 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3308 };
3309
3310 int const pta_size = ARRAY_SIZE (processor_alias_table);
3311
3312 /* Set up prefix/suffix so the error messages refer to either the command
3313 line argument, or the attribute(target). */
3314 if (main_args_p)
3315 {
3316 prefix = "-m";
3317 suffix = "";
3318 sw = "switch";
3319 }
3320 else
3321 {
3322 prefix = "option(\"";
3323 suffix = "\")";
3324 sw = "attribute";
3325 }
3326
3327 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3328 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3329 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3330 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3331 #ifdef TARGET_BI_ARCH
3332 else
3333 {
3334 #if TARGET_BI_ARCH == 1
3335 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3336 is on and OPTION_MASK_ABI_X32 is off. We turn off
3337 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3338 -mx32. */
3339 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3340 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3341 #else
3342 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3343 on and OPTION_MASK_ABI_64 is off. We turn off
3344 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3345 -m64. */
3346 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3347 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3348 #endif
3349 }
3350 #endif
3351
3352 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3353 {
3354 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3355 OPTION_MASK_ABI_64 for TARGET_X32. */
3356 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3357 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3358 }
3359 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3360 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3361 | OPTION_MASK_ABI_X32
3362 | OPTION_MASK_ABI_64);
3363 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3364 {
3365 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3366 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3367 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3368 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3369 }
3370
3371 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3372 SUBTARGET_OVERRIDE_OPTIONS;
3373 #endif
3374
3375 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3376 SUBSUBTARGET_OVERRIDE_OPTIONS;
3377 #endif
3378
3379 /* -fPIC is the default for x86_64. */
3380 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3381 opts->x_flag_pic = 2;
3382
3383 /* Need to check -mtune=generic first. */
3384 if (opts->x_ix86_tune_string)
3385 {
3386 /* As special support for cross compilers we read -mtune=native
3387 as -mtune=generic. With native compilers we won't see the
3388 -mtune=native, as it was changed by the driver. */
3389 if (!strcmp (opts->x_ix86_tune_string, "native"))
3390 {
3391 opts->x_ix86_tune_string = "generic";
3392 }
3393 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3394 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3395 "%stune=k8%s or %stune=generic%s instead as appropriate",
3396 prefix, suffix, prefix, suffix, prefix, suffix);
3397 }
3398 else
3399 {
3400 if (opts->x_ix86_arch_string)
3401 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3402 if (!opts->x_ix86_tune_string)
3403 {
3404 opts->x_ix86_tune_string
3405 = processor_target_table[TARGET_CPU_DEFAULT].name;
3406 ix86_tune_defaulted = 1;
3407 }
3408
3409 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3410 or defaulted. We need to use a sensible tune option. */
3411 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3412 {
3413 opts->x_ix86_tune_string = "generic";
3414 }
3415 }
3416
3417 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3418 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3419 {
3420 /* rep; movq isn't available in 32-bit code. */
3421 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3422 opts->x_ix86_stringop_alg = no_stringop;
3423 }
3424
3425 if (!opts->x_ix86_arch_string)
3426 opts->x_ix86_arch_string
3427 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3428 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3429 else
3430 ix86_arch_specified = 1;
3431
3432 if (opts_set->x_ix86_pmode)
3433 {
3434 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3435 && opts->x_ix86_pmode == PMODE_SI)
3436 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3437 && opts->x_ix86_pmode == PMODE_DI))
3438 error ("address mode %qs not supported in the %s bit mode",
3439 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3440 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3441 }
3442 else
3443 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3444 ? PMODE_DI : PMODE_SI;
3445
3446 if (!opts_set->x_ix86_abi)
3447 opts->x_ix86_abi = DEFAULT_ABI;
3448
3449 /* For targets using ms ABI enable ms-extensions, if not
3450 explicit turned off. For non-ms ABI we turn off this
3451 option. */
3452 if (!opts_set->x_flag_ms_extensions)
3453 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3454
3455 if (opts_set->x_ix86_cmodel)
3456 {
3457 switch (opts->x_ix86_cmodel)
3458 {
3459 case CM_SMALL:
3460 case CM_SMALL_PIC:
3461 if (opts->x_flag_pic)
3462 opts->x_ix86_cmodel = CM_SMALL_PIC;
3463 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3464 error ("code model %qs not supported in the %s bit mode",
3465 "small", "32");
3466 break;
3467
3468 case CM_MEDIUM:
3469 case CM_MEDIUM_PIC:
3470 if (opts->x_flag_pic)
3471 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3472 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3473 error ("code model %qs not supported in the %s bit mode",
3474 "medium", "32");
3475 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3476 error ("code model %qs not supported in x32 mode",
3477 "medium");
3478 break;
3479
3480 case CM_LARGE:
3481 case CM_LARGE_PIC:
3482 if (opts->x_flag_pic)
3483 opts->x_ix86_cmodel = CM_LARGE_PIC;
3484 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3485 error ("code model %qs not supported in the %s bit mode",
3486 "large", "32");
3487 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3488 error ("code model %qs not supported in x32 mode",
3489 "large");
3490 break;
3491
3492 case CM_32:
3493 if (opts->x_flag_pic)
3494 error ("code model %s does not support PIC mode", "32");
3495 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3496 error ("code model %qs not supported in the %s bit mode",
3497 "32", "64");
3498 break;
3499
3500 case CM_KERNEL:
3501 if (opts->x_flag_pic)
3502 {
3503 error ("code model %s does not support PIC mode", "kernel");
3504 opts->x_ix86_cmodel = CM_32;
3505 }
3506 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3507 error ("code model %qs not supported in the %s bit mode",
3508 "kernel", "32");
3509 break;
3510
3511 default:
3512 gcc_unreachable ();
3513 }
3514 }
3515 else
3516 {
3517 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3518 use of rip-relative addressing. This eliminates fixups that
3519 would otherwise be needed if this object is to be placed in a
3520 DLL, and is essentially just as efficient as direct addressing. */
3521 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3522 && (TARGET_RDOS || TARGET_PECOFF))
3523 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3524 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3525 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3526 else
3527 opts->x_ix86_cmodel = CM_32;
3528 }
3529 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3530 {
3531 error ("-masm=intel not supported in this configuration");
3532 opts->x_ix86_asm_dialect = ASM_ATT;
3533 }
3534 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3535 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3536 sorry ("%i-bit mode not compiled in",
3537 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3538
3539 for (i = 0; i < pta_size; i++)
3540 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3541 {
3542 ix86_schedule = processor_alias_table[i].schedule;
3543 ix86_arch = processor_alias_table[i].processor;
3544 /* Default cpu tuning to the architecture. */
3545 ix86_tune = ix86_arch;
3546
3547 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3548 && !(processor_alias_table[i].flags & PTA_64BIT))
3549 error ("CPU you selected does not support x86-64 "
3550 "instruction set");
3551
3552 if (processor_alias_table[i].flags & PTA_MMX
3553 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3554 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3555 if (processor_alias_table[i].flags & PTA_3DNOW
3556 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3557 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3558 if (processor_alias_table[i].flags & PTA_3DNOW_A
3559 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3560 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3561 if (processor_alias_table[i].flags & PTA_SSE
3562 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3563 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3564 if (processor_alias_table[i].flags & PTA_SSE2
3565 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3566 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3567 if (processor_alias_table[i].flags & PTA_SSE3
3568 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3569 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3570 if (processor_alias_table[i].flags & PTA_SSSE3
3571 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3572 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3573 if (processor_alias_table[i].flags & PTA_SSE4_1
3574 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3575 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3576 if (processor_alias_table[i].flags & PTA_SSE4_2
3577 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3578 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3579 if (processor_alias_table[i].flags & PTA_AVX
3580 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3581 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3582 if (processor_alias_table[i].flags & PTA_AVX2
3583 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3584 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3585 if (processor_alias_table[i].flags & PTA_FMA
3586 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3587 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3588 if (processor_alias_table[i].flags & PTA_SSE4A
3589 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3590 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3591 if (processor_alias_table[i].flags & PTA_FMA4
3592 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3593 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3594 if (processor_alias_table[i].flags & PTA_XOP
3595 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3596 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3597 if (processor_alias_table[i].flags & PTA_LWP
3598 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3599 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3600 if (processor_alias_table[i].flags & PTA_ABM
3601 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3602 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3603 if (processor_alias_table[i].flags & PTA_BMI
3604 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3605 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3606 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3607 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3608 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3609 if (processor_alias_table[i].flags & PTA_TBM
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3612 if (processor_alias_table[i].flags & PTA_BMI2
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3615 if (processor_alias_table[i].flags & PTA_CX16
3616 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3617 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3618 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3619 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3620 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3621 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3622 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3625 if (processor_alias_table[i].flags & PTA_MOVBE
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3628 if (processor_alias_table[i].flags & PTA_AES
3629 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3630 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3631 if (processor_alias_table[i].flags & PTA_SHA
3632 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3633 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3634 if (processor_alias_table[i].flags & PTA_PCLMUL
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3637 if (processor_alias_table[i].flags & PTA_FSGSBASE
3638 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3639 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3640 if (processor_alias_table[i].flags & PTA_RDRND
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3643 if (processor_alias_table[i].flags & PTA_F16C
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3646 if (processor_alias_table[i].flags & PTA_RTM
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3649 if (processor_alias_table[i].flags & PTA_HLE
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3652 if (processor_alias_table[i].flags & PTA_PRFCHW
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3655 if (processor_alias_table[i].flags & PTA_RDSEED
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3658 if (processor_alias_table[i].flags & PTA_ADX
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3661 if (processor_alias_table[i].flags & PTA_FXSR
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3664 if (processor_alias_table[i].flags & PTA_XSAVE
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3667 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3670 if (processor_alias_table[i].flags & PTA_AVX512F
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3673 if (processor_alias_table[i].flags & PTA_AVX512ER
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3676 if (processor_alias_table[i].flags & PTA_AVX512PF
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3679 if (processor_alias_table[i].flags & PTA_AVX512CD
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3682 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3685 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3688 if (processor_alias_table[i].flags & PTA_XSAVEC
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3691 if (processor_alias_table[i].flags & PTA_XSAVES
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3694 if (processor_alias_table[i].flags & PTA_AVX512DQ
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3697 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3698 x86_prefetch_sse = true;
3699
3700 break;
3701 }
3702
3703 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3704 error ("generic CPU can be used only for %stune=%s %s",
3705 prefix, suffix, sw);
3706 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3707 error ("intel CPU can be used only for %stune=%s %s",
3708 prefix, suffix, sw);
3709 else if (i == pta_size)
3710 error ("bad value (%s) for %sarch=%s %s",
3711 opts->x_ix86_arch_string, prefix, suffix, sw);
3712
3713 ix86_arch_mask = 1u << ix86_arch;
3714 for (i = 0; i < X86_ARCH_LAST; ++i)
3715 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3716
3717 for (i = 0; i < pta_size; i++)
3718 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3719 {
3720 ix86_schedule = processor_alias_table[i].schedule;
3721 ix86_tune = processor_alias_table[i].processor;
3722 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3723 {
3724 if (!(processor_alias_table[i].flags & PTA_64BIT))
3725 {
3726 if (ix86_tune_defaulted)
3727 {
3728 opts->x_ix86_tune_string = "x86-64";
3729 for (i = 0; i < pta_size; i++)
3730 if (! strcmp (opts->x_ix86_tune_string,
3731 processor_alias_table[i].name))
3732 break;
3733 ix86_schedule = processor_alias_table[i].schedule;
3734 ix86_tune = processor_alias_table[i].processor;
3735 }
3736 else
3737 error ("CPU you selected does not support x86-64 "
3738 "instruction set");
3739 }
3740 }
3741 /* Intel CPUs have always interpreted SSE prefetch instructions as
3742 NOPs; so, we can enable SSE prefetch instructions even when
3743 -mtune (rather than -march) points us to a processor that has them.
3744 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3745 higher processors. */
3746 if (TARGET_CMOV
3747 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3748 x86_prefetch_sse = true;
3749 break;
3750 }
3751
3752 if (ix86_tune_specified && i == pta_size)
3753 error ("bad value (%s) for %stune=%s %s",
3754 opts->x_ix86_tune_string, prefix, suffix, sw);
3755
3756 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3757
3758 #ifndef USE_IX86_FRAME_POINTER
3759 #define USE_IX86_FRAME_POINTER 0
3760 #endif
3761
3762 #ifndef USE_X86_64_FRAME_POINTER
3763 #define USE_X86_64_FRAME_POINTER 0
3764 #endif
3765
3766 /* Set the default values for switches whose default depends on TARGET_64BIT
3767 in case they weren't overwritten by command line options. */
3768 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3769 {
3770 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3771 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3772 if (opts->x_flag_asynchronous_unwind_tables
3773 && !opts_set->x_flag_unwind_tables
3774 && TARGET_64BIT_MS_ABI)
3775 opts->x_flag_unwind_tables = 1;
3776 if (opts->x_flag_asynchronous_unwind_tables == 2)
3777 opts->x_flag_unwind_tables
3778 = opts->x_flag_asynchronous_unwind_tables = 1;
3779 if (opts->x_flag_pcc_struct_return == 2)
3780 opts->x_flag_pcc_struct_return = 0;
3781 }
3782 else
3783 {
3784 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3785 opts->x_flag_omit_frame_pointer
3786 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3787 if (opts->x_flag_asynchronous_unwind_tables == 2)
3788 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3789 if (opts->x_flag_pcc_struct_return == 2)
3790 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3791 }
3792
3793 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3794 if (opts->x_optimize_size)
3795 ix86_cost = &ix86_size_cost;
3796 else
3797 ix86_cost = ix86_tune_cost;
3798
3799 /* Arrange to set up i386_stack_locals for all functions. */
3800 init_machine_status = ix86_init_machine_status;
3801
3802 /* Validate -mregparm= value. */
3803 if (opts_set->x_ix86_regparm)
3804 {
3805 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3806 warning (0, "-mregparm is ignored in 64-bit mode");
3807 if (opts->x_ix86_regparm > REGPARM_MAX)
3808 {
3809 error ("-mregparm=%d is not between 0 and %d",
3810 opts->x_ix86_regparm, REGPARM_MAX);
3811 opts->x_ix86_regparm = 0;
3812 }
3813 }
3814 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3815 opts->x_ix86_regparm = REGPARM_MAX;
3816
3817 /* Default align_* from the processor table. */
3818 if (opts->x_align_loops == 0)
3819 {
3820 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3821 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3822 }
3823 if (opts->x_align_jumps == 0)
3824 {
3825 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3826 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3827 }
3828 if (opts->x_align_functions == 0)
3829 {
3830 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3831 }
3832
3833 /* Provide default for -mbranch-cost= value. */
3834 if (!opts_set->x_ix86_branch_cost)
3835 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3836
3837 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3838 {
3839 opts->x_target_flags
3840 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3841
3842 /* Enable by default the SSE and MMX builtins. Do allow the user to
3843 explicitly disable any of these. In particular, disabling SSE and
3844 MMX for kernel code is extremely useful. */
3845 if (!ix86_arch_specified)
3846 opts->x_ix86_isa_flags
3847 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3848 | TARGET_SUBTARGET64_ISA_DEFAULT)
3849 & ~opts->x_ix86_isa_flags_explicit);
3850
3851 if (TARGET_RTD_P (opts->x_target_flags))
3852 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3853 }
3854 else
3855 {
3856 opts->x_target_flags
3857 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3858
3859 if (!ix86_arch_specified)
3860 opts->x_ix86_isa_flags
3861 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3862
3863 /* i386 ABI does not specify red zone. It still makes sense to use it
3864 when programmer takes care to stack from being destroyed. */
3865 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3866 opts->x_target_flags |= MASK_NO_RED_ZONE;
3867 }
3868
3869 /* Keep nonleaf frame pointers. */
3870 if (opts->x_flag_omit_frame_pointer)
3871 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3872 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3873 opts->x_flag_omit_frame_pointer = 1;
3874
3875 /* If we're doing fast math, we don't care about comparison order
3876 wrt NaNs. This lets us use a shorter comparison sequence. */
3877 if (opts->x_flag_finite_math_only)
3878 opts->x_target_flags &= ~MASK_IEEE_FP;
3879
3880 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3881 since the insns won't need emulation. */
3882 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3883 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3884
3885 /* Likewise, if the target doesn't have a 387, or we've specified
3886 software floating point, don't use 387 inline intrinsics. */
3887 if (!TARGET_80387_P (opts->x_target_flags))
3888 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3889
3890 /* Turn on MMX builtins for -msse. */
3891 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3892 opts->x_ix86_isa_flags
3893 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3894
3895 /* Enable SSE prefetch. */
3896 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3897 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3898 x86_prefetch_sse = true;
3899
3900 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3901 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3902 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_isa_flags
3904 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3905
3906 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3907 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3908 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3909 opts->x_ix86_isa_flags
3910 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3911
3912 /* Enable lzcnt instruction for -mabm. */
3913 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3914 opts->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3916
3917 /* Validate -mpreferred-stack-boundary= value or default it to
3918 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3919 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3920 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3921 {
3922 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3923 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3924 int max = (TARGET_SEH ? 4 : 12);
3925
3926 if (opts->x_ix86_preferred_stack_boundary_arg < min
3927 || opts->x_ix86_preferred_stack_boundary_arg > max)
3928 {
3929 if (min == max)
3930 error ("-mpreferred-stack-boundary is not supported "
3931 "for this target");
3932 else
3933 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3934 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3935 }
3936 else
3937 ix86_preferred_stack_boundary
3938 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3939 }
3940
3941 /* Set the default value for -mstackrealign. */
3942 if (opts->x_ix86_force_align_arg_pointer == -1)
3943 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3944
3945 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3946
3947 /* Validate -mincoming-stack-boundary= value or default it to
3948 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3949 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3950 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3951 {
3952 if (opts->x_ix86_incoming_stack_boundary_arg
3953 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3954 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3955 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3956 opts->x_ix86_incoming_stack_boundary_arg,
3957 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3958 else
3959 {
3960 ix86_user_incoming_stack_boundary
3961 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3962 ix86_incoming_stack_boundary
3963 = ix86_user_incoming_stack_boundary;
3964 }
3965 }
3966
3967 /* Accept -msseregparm only if at least SSE support is enabled. */
3968 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3969 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3970 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3971
3972 if (opts_set->x_ix86_fpmath)
3973 {
3974 if (opts->x_ix86_fpmath & FPMATH_SSE)
3975 {
3976 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3977 {
3978 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3979 opts->x_ix86_fpmath = FPMATH_387;
3980 }
3981 else if ((opts->x_ix86_fpmath & FPMATH_387)
3982 && !TARGET_80387_P (opts->x_target_flags))
3983 {
3984 warning (0, "387 instruction set disabled, using SSE arithmetics");
3985 opts->x_ix86_fpmath = FPMATH_SSE;
3986 }
3987 }
3988 }
3989 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3990 fpmath=387. The second is however default at many targets since the
3991 extra 80bit precision of temporaries is considered to be part of ABI.
3992 Overwrite the default at least for -ffast-math.
3993 TODO: -mfpmath=both seems to produce same performing code with bit
3994 smaller binaries. It is however not clear if register allocation is
3995 ready for this setting.
3996 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3997 codegen. We may switch to 387 with -ffast-math for size optimized
3998 functions. */
3999 else if (fast_math_flags_set_p (&global_options)
4000 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4001 opts->x_ix86_fpmath = FPMATH_SSE;
4002 else
4003 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4004
4005 /* If the i387 is disabled, then do not return values in it. */
4006 if (!TARGET_80387_P (opts->x_target_flags))
4007 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4008
4009 /* Use external vectorized library in vectorizing intrinsics. */
4010 if (opts_set->x_ix86_veclibabi_type)
4011 switch (opts->x_ix86_veclibabi_type)
4012 {
4013 case ix86_veclibabi_type_svml:
4014 ix86_veclib_handler = ix86_veclibabi_svml;
4015 break;
4016
4017 case ix86_veclibabi_type_acml:
4018 ix86_veclib_handler = ix86_veclibabi_acml;
4019 break;
4020
4021 default:
4022 gcc_unreachable ();
4023 }
4024
4025 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4026 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4027 && !opts->x_optimize_size)
4028 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4029
4030 /* If stack probes are required, the space used for large function
4031 arguments on the stack must also be probed, so enable
4032 -maccumulate-outgoing-args so this happens in the prologue. */
4033 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4034 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4035 {
4036 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4038 "for correctness", prefix, suffix);
4039 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4040 }
4041
4042 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4043 {
4044 char *p;
4045 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4046 p = strchr (internal_label_prefix, 'X');
4047 internal_label_prefix_len = p - internal_label_prefix;
4048 *p = '\0';
4049 }
4050
4051 /* When scheduling description is not available, disable scheduler pass
4052 so it won't slow down the compilation and make x87 code slower. */
4053 if (!TARGET_SCHEDULE)
4054 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4055
4056 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4057 ix86_tune_cost->simultaneous_prefetches,
4058 opts->x_param_values,
4059 opts_set->x_param_values);
4060 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4061 ix86_tune_cost->prefetch_block,
4062 opts->x_param_values,
4063 opts_set->x_param_values);
4064 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4065 ix86_tune_cost->l1_cache_size,
4066 opts->x_param_values,
4067 opts_set->x_param_values);
4068 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4069 ix86_tune_cost->l2_cache_size,
4070 opts->x_param_values,
4071 opts_set->x_param_values);
4072
4073 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4074 if (opts->x_flag_prefetch_loop_arrays < 0
4075 && HAVE_prefetch
4076 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4077 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4078 opts->x_flag_prefetch_loop_arrays = 1;
4079
4080 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4081 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4082 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4083 targetm.expand_builtin_va_start = NULL;
4084
4085 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4086 {
4087 ix86_gen_leave = gen_leave_rex64;
4088 if (Pmode == DImode)
4089 {
4090 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4091 ix86_gen_tls_local_dynamic_base_64
4092 = gen_tls_local_dynamic_base_64_di;
4093 }
4094 else
4095 {
4096 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4097 ix86_gen_tls_local_dynamic_base_64
4098 = gen_tls_local_dynamic_base_64_si;
4099 }
4100 }
4101 else
4102 ix86_gen_leave = gen_leave;
4103
4104 if (Pmode == DImode)
4105 {
4106 ix86_gen_add3 = gen_adddi3;
4107 ix86_gen_sub3 = gen_subdi3;
4108 ix86_gen_sub3_carry = gen_subdi3_carry;
4109 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4110 ix86_gen_andsp = gen_anddi3;
4111 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4112 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4113 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4114 ix86_gen_monitor = gen_sse3_monitor_di;
4115 }
4116 else
4117 {
4118 ix86_gen_add3 = gen_addsi3;
4119 ix86_gen_sub3 = gen_subsi3;
4120 ix86_gen_sub3_carry = gen_subsi3_carry;
4121 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4122 ix86_gen_andsp = gen_andsi3;
4123 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4124 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4125 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4126 ix86_gen_monitor = gen_sse3_monitor_si;
4127 }
4128
4129 #ifdef USE_IX86_CLD
4130 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4131 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4132 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4133 #endif
4134
4135 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4136 {
4137 if (opts->x_flag_fentry > 0)
4138 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4139 "with -fpic");
4140 opts->x_flag_fentry = 0;
4141 }
4142 else if (TARGET_SEH)
4143 {
4144 if (opts->x_flag_fentry == 0)
4145 sorry ("-mno-fentry isn%'t compatible with SEH");
4146 opts->x_flag_fentry = 1;
4147 }
4148 else if (opts->x_flag_fentry < 0)
4149 {
4150 #if defined(PROFILE_BEFORE_PROLOGUE)
4151 opts->x_flag_fentry = 1;
4152 #else
4153 opts->x_flag_fentry = 0;
4154 #endif
4155 }
4156
4157 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4158 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4159 AVX unaligned load/store. */
4160 if (!opts->x_optimize_size)
4161 {
4162 if (flag_expensive_optimizations
4163 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4164 opts->x_target_flags |= MASK_VZEROUPPER;
4165 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4166 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4167 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4168 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4169 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4170 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4171 /* Enable 128-bit AVX instruction generation
4172 for the auto-vectorizer. */
4173 if (TARGET_AVX128_OPTIMAL
4174 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4175 opts->x_target_flags |= MASK_PREFER_AVX128;
4176 }
4177
4178 if (opts->x_ix86_recip_name)
4179 {
4180 char *p = ASTRDUP (opts->x_ix86_recip_name);
4181 char *q;
4182 unsigned int mask, i;
4183 bool invert;
4184
4185 while ((q = strtok (p, ",")) != NULL)
4186 {
4187 p = NULL;
4188 if (*q == '!')
4189 {
4190 invert = true;
4191 q++;
4192 }
4193 else
4194 invert = false;
4195
4196 if (!strcmp (q, "default"))
4197 mask = RECIP_MASK_ALL;
4198 else
4199 {
4200 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4201 if (!strcmp (q, recip_options[i].string))
4202 {
4203 mask = recip_options[i].mask;
4204 break;
4205 }
4206
4207 if (i == ARRAY_SIZE (recip_options))
4208 {
4209 error ("unknown option for -mrecip=%s", q);
4210 invert = false;
4211 mask = RECIP_MASK_NONE;
4212 }
4213 }
4214
4215 opts->x_recip_mask_explicit |= mask;
4216 if (invert)
4217 opts->x_recip_mask &= ~mask;
4218 else
4219 opts->x_recip_mask |= mask;
4220 }
4221 }
4222
4223 if (TARGET_RECIP_P (opts->x_target_flags))
4224 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4225 else if (opts_set->x_target_flags & MASK_RECIP)
4226 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4227
4228 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4229 for 64-bit Bionic. */
4230 if (TARGET_HAS_BIONIC
4231 && !(opts_set->x_target_flags
4232 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4233 opts->x_target_flags |= (TARGET_64BIT
4234 ? MASK_LONG_DOUBLE_128
4235 : MASK_LONG_DOUBLE_64);
4236
4237 /* Only one of them can be active. */
4238 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4239 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4240
4241 /* Save the initial options in case the user does function specific
4242 options. */
4243 if (main_args_p)
4244 target_option_default_node = target_option_current_node
4245 = build_target_option_node (opts);
4246
4247 /* Handle stack protector */
4248 if (!opts_set->x_ix86_stack_protector_guard)
4249 opts->x_ix86_stack_protector_guard
4250 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4251
4252 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4253 if (opts->x_ix86_tune_memcpy_strategy)
4254 {
4255 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4256 ix86_parse_stringop_strategy_string (str, false);
4257 free (str);
4258 }
4259
4260 if (opts->x_ix86_tune_memset_strategy)
4261 {
4262 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4263 ix86_parse_stringop_strategy_string (str, true);
4264 free (str);
4265 }
4266 }
4267
4268 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4269
4270 static void
4271 ix86_option_override (void)
4272 {
4273 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4274 static struct register_pass_info insert_vzeroupper_info
4275 = { pass_insert_vzeroupper, "reload",
4276 1, PASS_POS_INSERT_AFTER
4277 };
4278
4279 ix86_option_override_internal (true, &global_options, &global_options_set);
4280
4281
4282 /* This needs to be done at start up. It's convenient to do it here. */
4283 register_pass (&insert_vzeroupper_info);
4284 }
4285
4286 /* Update register usage after having seen the compiler flags. */
4287
4288 static void
4289 ix86_conditional_register_usage (void)
4290 {
4291 int i, c_mask;
4292 unsigned int j;
4293
4294 /* The PIC register, if it exists, is fixed. */
4295 j = PIC_OFFSET_TABLE_REGNUM;
4296 if (j != INVALID_REGNUM)
4297 fixed_regs[j] = call_used_regs[j] = 1;
4298
4299 /* For 32-bit targets, squash the REX registers. */
4300 if (! TARGET_64BIT)
4301 {
4302 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4304 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4305 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4306 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4307 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4308 }
4309
4310 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4311 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4312 : TARGET_64BIT ? (1 << 2)
4313 : (1 << 1));
4314
4315 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4316
4317 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4318 {
4319 /* Set/reset conditionally defined registers from
4320 CALL_USED_REGISTERS initializer. */
4321 if (call_used_regs[i] > 1)
4322 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4323
4324 /* Calculate registers of CLOBBERED_REGS register set
4325 as call used registers from GENERAL_REGS register set. */
4326 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4327 && call_used_regs[i])
4328 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4329 }
4330
4331 /* If MMX is disabled, squash the registers. */
4332 if (! TARGET_MMX)
4333 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4334 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4335 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4336
4337 /* If SSE is disabled, squash the registers. */
4338 if (! TARGET_SSE)
4339 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4340 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4341 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4342
4343 /* If the FPU is disabled, squash the registers. */
4344 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4345 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4346 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4347 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4348
4349 /* If AVX512F is disabled, squash the registers. */
4350 if (! TARGET_AVX512F)
4351 {
4352 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4353 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4354
4355 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4356 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4357 }
4358 }
4359
4360 \f
4361 /* Save the current options */
4362
4363 static void
4364 ix86_function_specific_save (struct cl_target_option *ptr,
4365 struct gcc_options *opts)
4366 {
4367 ptr->arch = ix86_arch;
4368 ptr->schedule = ix86_schedule;
4369 ptr->tune = ix86_tune;
4370 ptr->branch_cost = ix86_branch_cost;
4371 ptr->tune_defaulted = ix86_tune_defaulted;
4372 ptr->arch_specified = ix86_arch_specified;
4373 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4374 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4375 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4376 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4377 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4378 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4379 ptr->x_ix86_abi = opts->x_ix86_abi;
4380 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4381 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4382 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4383 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4384 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4385 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4386 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4387 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4388 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4389 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4390 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4391 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4392 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4393 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4394 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4395 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4396 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4397 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4398 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4399 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4400
4401 /* The fields are char but the variables are not; make sure the
4402 values fit in the fields. */
4403 gcc_assert (ptr->arch == ix86_arch);
4404 gcc_assert (ptr->schedule == ix86_schedule);
4405 gcc_assert (ptr->tune == ix86_tune);
4406 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4407 }
4408
4409 /* Restore the current options */
4410
4411 static void
4412 ix86_function_specific_restore (struct gcc_options *opts,
4413 struct cl_target_option *ptr)
4414 {
4415 enum processor_type old_tune = ix86_tune;
4416 enum processor_type old_arch = ix86_arch;
4417 unsigned int ix86_arch_mask;
4418 int i;
4419
4420 /* We don't change -fPIC. */
4421 opts->x_flag_pic = flag_pic;
4422
4423 ix86_arch = (enum processor_type) ptr->arch;
4424 ix86_schedule = (enum attr_cpu) ptr->schedule;
4425 ix86_tune = (enum processor_type) ptr->tune;
4426 opts->x_ix86_branch_cost = ptr->branch_cost;
4427 ix86_tune_defaulted = ptr->tune_defaulted;
4428 ix86_arch_specified = ptr->arch_specified;
4429 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4430 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4431 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4432 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4433 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4434 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4435 opts->x_ix86_abi = ptr->x_ix86_abi;
4436 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4437 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4438 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4439 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4440 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4441 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4442 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4443 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4444 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4445 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4446 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4447 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4448 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4449 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4450 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4451 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4452 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4453 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4454 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4455 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4456
4457 /* Recreate the arch feature tests if the arch changed */
4458 if (old_arch != ix86_arch)
4459 {
4460 ix86_arch_mask = 1u << ix86_arch;
4461 for (i = 0; i < X86_ARCH_LAST; ++i)
4462 ix86_arch_features[i]
4463 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4464 }
4465
4466 /* Recreate the tune optimization tests */
4467 if (old_tune != ix86_tune)
4468 set_ix86_tune_features (ix86_tune, false);
4469 }
4470
4471 /* Print the current options */
4472
4473 static void
4474 ix86_function_specific_print (FILE *file, int indent,
4475 struct cl_target_option *ptr)
4476 {
4477 char *target_string
4478 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4479 NULL, NULL, ptr->x_ix86_fpmath, false);
4480
4481 gcc_assert (ptr->arch < PROCESSOR_max);
4482 fprintf (file, "%*sarch = %d (%s)\n",
4483 indent, "",
4484 ptr->arch, processor_target_table[ptr->arch].name);
4485
4486 gcc_assert (ptr->tune < PROCESSOR_max);
4487 fprintf (file, "%*stune = %d (%s)\n",
4488 indent, "",
4489 ptr->tune, processor_target_table[ptr->tune].name);
4490
4491 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4492
4493 if (target_string)
4494 {
4495 fprintf (file, "%*s%s\n", indent, "", target_string);
4496 free (target_string);
4497 }
4498 }
4499
4500 \f
4501 /* Inner function to process the attribute((target(...))), take an argument and
4502 set the current options from the argument. If we have a list, recursively go
4503 over the list. */
4504
4505 static bool
4506 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4507 struct gcc_options *opts,
4508 struct gcc_options *opts_set,
4509 struct gcc_options *enum_opts_set)
4510 {
4511 char *next_optstr;
4512 bool ret = true;
4513
4514 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4515 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4516 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4517 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4518 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4519
4520 enum ix86_opt_type
4521 {
4522 ix86_opt_unknown,
4523 ix86_opt_yes,
4524 ix86_opt_no,
4525 ix86_opt_str,
4526 ix86_opt_enum,
4527 ix86_opt_isa
4528 };
4529
4530 static const struct
4531 {
4532 const char *string;
4533 size_t len;
4534 enum ix86_opt_type type;
4535 int opt;
4536 int mask;
4537 } attrs[] = {
4538 /* isa options */
4539 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4540 IX86_ATTR_ISA ("abm", OPT_mabm),
4541 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4542 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4543 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4544 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4545 IX86_ATTR_ISA ("aes", OPT_maes),
4546 IX86_ATTR_ISA ("sha", OPT_msha),
4547 IX86_ATTR_ISA ("avx", OPT_mavx),
4548 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4549 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4550 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4551 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4552 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4553 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4554 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4555 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4556 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4557 IX86_ATTR_ISA ("sse", OPT_msse),
4558 IX86_ATTR_ISA ("sse2", OPT_msse2),
4559 IX86_ATTR_ISA ("sse3", OPT_msse3),
4560 IX86_ATTR_ISA ("sse4", OPT_msse4),
4561 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4562 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4563 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4564 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4565 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4566 IX86_ATTR_ISA ("fma", OPT_mfma),
4567 IX86_ATTR_ISA ("xop", OPT_mxop),
4568 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4569 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4570 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4571 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4572 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4573 IX86_ATTR_ISA ("hle", OPT_mhle),
4574 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4575 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4576 IX86_ATTR_ISA ("adx", OPT_madx),
4577 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4578 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4579 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4580 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4581 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4582 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4583 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4584
4585 /* enum options */
4586 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4587
4588 /* string options */
4589 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4590 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4591
4592 /* flag options */
4593 IX86_ATTR_YES ("cld",
4594 OPT_mcld,
4595 MASK_CLD),
4596
4597 IX86_ATTR_NO ("fancy-math-387",
4598 OPT_mfancy_math_387,
4599 MASK_NO_FANCY_MATH_387),
4600
4601 IX86_ATTR_YES ("ieee-fp",
4602 OPT_mieee_fp,
4603 MASK_IEEE_FP),
4604
4605 IX86_ATTR_YES ("inline-all-stringops",
4606 OPT_minline_all_stringops,
4607 MASK_INLINE_ALL_STRINGOPS),
4608
4609 IX86_ATTR_YES ("inline-stringops-dynamically",
4610 OPT_minline_stringops_dynamically,
4611 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4612
4613 IX86_ATTR_NO ("align-stringops",
4614 OPT_mno_align_stringops,
4615 MASK_NO_ALIGN_STRINGOPS),
4616
4617 IX86_ATTR_YES ("recip",
4618 OPT_mrecip,
4619 MASK_RECIP),
4620
4621 };
4622
4623 /* If this is a list, recurse to get the options. */
4624 if (TREE_CODE (args) == TREE_LIST)
4625 {
4626 bool ret = true;
4627
4628 for (; args; args = TREE_CHAIN (args))
4629 if (TREE_VALUE (args)
4630 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4631 p_strings, opts, opts_set,
4632 enum_opts_set))
4633 ret = false;
4634
4635 return ret;
4636 }
4637
4638 else if (TREE_CODE (args) != STRING_CST)
4639 {
4640 error ("attribute %<target%> argument not a string");
4641 return false;
4642 }
4643
4644 /* Handle multiple arguments separated by commas. */
4645 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4646
4647 while (next_optstr && *next_optstr != '\0')
4648 {
4649 char *p = next_optstr;
4650 char *orig_p = p;
4651 char *comma = strchr (next_optstr, ',');
4652 const char *opt_string;
4653 size_t len, opt_len;
4654 int opt;
4655 bool opt_set_p;
4656 char ch;
4657 unsigned i;
4658 enum ix86_opt_type type = ix86_opt_unknown;
4659 int mask = 0;
4660
4661 if (comma)
4662 {
4663 *comma = '\0';
4664 len = comma - next_optstr;
4665 next_optstr = comma + 1;
4666 }
4667 else
4668 {
4669 len = strlen (p);
4670 next_optstr = NULL;
4671 }
4672
4673 /* Recognize no-xxx. */
4674 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4675 {
4676 opt_set_p = false;
4677 p += 3;
4678 len -= 3;
4679 }
4680 else
4681 opt_set_p = true;
4682
4683 /* Find the option. */
4684 ch = *p;
4685 opt = N_OPTS;
4686 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4687 {
4688 type = attrs[i].type;
4689 opt_len = attrs[i].len;
4690 if (ch == attrs[i].string[0]
4691 && ((type != ix86_opt_str && type != ix86_opt_enum)
4692 ? len == opt_len
4693 : len > opt_len)
4694 && memcmp (p, attrs[i].string, opt_len) == 0)
4695 {
4696 opt = attrs[i].opt;
4697 mask = attrs[i].mask;
4698 opt_string = attrs[i].string;
4699 break;
4700 }
4701 }
4702
4703 /* Process the option. */
4704 if (opt == N_OPTS)
4705 {
4706 error ("attribute(target(\"%s\")) is unknown", orig_p);
4707 ret = false;
4708 }
4709
4710 else if (type == ix86_opt_isa)
4711 {
4712 struct cl_decoded_option decoded;
4713
4714 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4715 ix86_handle_option (opts, opts_set,
4716 &decoded, input_location);
4717 }
4718
4719 else if (type == ix86_opt_yes || type == ix86_opt_no)
4720 {
4721 if (type == ix86_opt_no)
4722 opt_set_p = !opt_set_p;
4723
4724 if (opt_set_p)
4725 opts->x_target_flags |= mask;
4726 else
4727 opts->x_target_flags &= ~mask;
4728 }
4729
4730 else if (type == ix86_opt_str)
4731 {
4732 if (p_strings[opt])
4733 {
4734 error ("option(\"%s\") was already specified", opt_string);
4735 ret = false;
4736 }
4737 else
4738 p_strings[opt] = xstrdup (p + opt_len);
4739 }
4740
4741 else if (type == ix86_opt_enum)
4742 {
4743 bool arg_ok;
4744 int value;
4745
4746 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4747 if (arg_ok)
4748 set_option (opts, enum_opts_set, opt, value,
4749 p + opt_len, DK_UNSPECIFIED, input_location,
4750 global_dc);
4751 else
4752 {
4753 error ("attribute(target(\"%s\")) is unknown", orig_p);
4754 ret = false;
4755 }
4756 }
4757
4758 else
4759 gcc_unreachable ();
4760 }
4761
4762 return ret;
4763 }
4764
4765 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4766
4767 tree
4768 ix86_valid_target_attribute_tree (tree args,
4769 struct gcc_options *opts,
4770 struct gcc_options *opts_set)
4771 {
4772 const char *orig_arch_string = opts->x_ix86_arch_string;
4773 const char *orig_tune_string = opts->x_ix86_tune_string;
4774 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4775 int orig_tune_defaulted = ix86_tune_defaulted;
4776 int orig_arch_specified = ix86_arch_specified;
4777 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4778 tree t = NULL_TREE;
4779 int i;
4780 struct cl_target_option *def
4781 = TREE_TARGET_OPTION (target_option_default_node);
4782 struct gcc_options enum_opts_set;
4783
4784 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4785
4786 /* Process each of the options on the chain. */
4787 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4788 opts_set, &enum_opts_set))
4789 return error_mark_node;
4790
4791 /* If the changed options are different from the default, rerun
4792 ix86_option_override_internal, and then save the options away.
4793 The string options are are attribute options, and will be undone
4794 when we copy the save structure. */
4795 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4796 || opts->x_target_flags != def->x_target_flags
4797 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4798 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4799 || enum_opts_set.x_ix86_fpmath)
4800 {
4801 /* If we are using the default tune= or arch=, undo the string assigned,
4802 and use the default. */
4803 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4804 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4805 else if (!orig_arch_specified)
4806 opts->x_ix86_arch_string = NULL;
4807
4808 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4809 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4810 else if (orig_tune_defaulted)
4811 opts->x_ix86_tune_string = NULL;
4812
4813 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4814 if (enum_opts_set.x_ix86_fpmath)
4815 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4816 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4817 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4818 {
4819 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4820 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4821 }
4822
4823 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4824 ix86_option_override_internal (false, opts, opts_set);
4825
4826 /* Add any builtin functions with the new isa if any. */
4827 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4828
4829 /* Save the current options unless we are validating options for
4830 #pragma. */
4831 t = build_target_option_node (opts);
4832
4833 opts->x_ix86_arch_string = orig_arch_string;
4834 opts->x_ix86_tune_string = orig_tune_string;
4835 opts_set->x_ix86_fpmath = orig_fpmath_set;
4836
4837 /* Free up memory allocated to hold the strings */
4838 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4839 free (option_strings[i]);
4840 }
4841
4842 return t;
4843 }
4844
4845 /* Hook to validate attribute((target("string"))). */
4846
4847 static bool
4848 ix86_valid_target_attribute_p (tree fndecl,
4849 tree ARG_UNUSED (name),
4850 tree args,
4851 int ARG_UNUSED (flags))
4852 {
4853 struct gcc_options func_options;
4854 tree new_target, new_optimize;
4855 bool ret = true;
4856
4857 /* attribute((target("default"))) does nothing, beyond
4858 affecting multi-versioning. */
4859 if (TREE_VALUE (args)
4860 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4861 && TREE_CHAIN (args) == NULL_TREE
4862 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4863 return true;
4864
4865 tree old_optimize = build_optimization_node (&global_options);
4866
4867 /* Get the optimization options of the current function. */
4868 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4869
4870 if (!func_optimize)
4871 func_optimize = old_optimize;
4872
4873 /* Init func_options. */
4874 memset (&func_options, 0, sizeof (func_options));
4875 init_options_struct (&func_options, NULL);
4876 lang_hooks.init_options_struct (&func_options);
4877
4878 cl_optimization_restore (&func_options,
4879 TREE_OPTIMIZATION (func_optimize));
4880
4881 /* Initialize func_options to the default before its target options can
4882 be set. */
4883 cl_target_option_restore (&func_options,
4884 TREE_TARGET_OPTION (target_option_default_node));
4885
4886 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4887 &global_options_set);
4888
4889 new_optimize = build_optimization_node (&func_options);
4890
4891 if (new_target == error_mark_node)
4892 ret = false;
4893
4894 else if (fndecl && new_target)
4895 {
4896 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4897
4898 if (old_optimize != new_optimize)
4899 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4900 }
4901
4902 return ret;
4903 }
4904
4905 \f
4906 /* Hook to determine if one function can safely inline another. */
4907
4908 static bool
4909 ix86_can_inline_p (tree caller, tree callee)
4910 {
4911 bool ret = false;
4912 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4913 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4914
4915 /* If callee has no option attributes, then it is ok to inline. */
4916 if (!callee_tree)
4917 ret = true;
4918
4919 /* If caller has no option attributes, but callee does then it is not ok to
4920 inline. */
4921 else if (!caller_tree)
4922 ret = false;
4923
4924 else
4925 {
4926 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4927 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4928
4929 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4930 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4931 function. */
4932 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4933 != callee_opts->x_ix86_isa_flags)
4934 ret = false;
4935
4936 /* See if we have the same non-isa options. */
4937 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4938 ret = false;
4939
4940 /* See if arch, tune, etc. are the same. */
4941 else if (caller_opts->arch != callee_opts->arch)
4942 ret = false;
4943
4944 else if (caller_opts->tune != callee_opts->tune)
4945 ret = false;
4946
4947 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4948 ret = false;
4949
4950 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4951 ret = false;
4952
4953 else
4954 ret = true;
4955 }
4956
4957 return ret;
4958 }
4959
4960 \f
4961 /* Remember the last target of ix86_set_current_function. */
4962 static GTY(()) tree ix86_previous_fndecl;
4963
4964 /* Invalidate ix86_previous_fndecl cache. */
4965 void
4966 ix86_reset_previous_fndecl (void)
4967 {
4968 ix86_previous_fndecl = NULL_TREE;
4969 }
4970
4971 /* Establish appropriate back-end context for processing the function
4972 FNDECL. The argument might be NULL to indicate processing at top
4973 level, outside of any function scope. */
4974 static void
4975 ix86_set_current_function (tree fndecl)
4976 {
4977 /* Only change the context if the function changes. This hook is called
4978 several times in the course of compiling a function, and we don't want to
4979 slow things down too much or call target_reinit when it isn't safe. */
4980 if (fndecl && fndecl != ix86_previous_fndecl)
4981 {
4982 tree old_tree = (ix86_previous_fndecl
4983 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4984 : NULL_TREE);
4985
4986 tree new_tree = (fndecl
4987 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4988 : NULL_TREE);
4989
4990 ix86_previous_fndecl = fndecl;
4991 if (old_tree == new_tree)
4992 ;
4993
4994 else if (new_tree)
4995 {
4996 cl_target_option_restore (&global_options,
4997 TREE_TARGET_OPTION (new_tree));
4998 if (TREE_TARGET_GLOBALS (new_tree))
4999 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5000 else
5001 TREE_TARGET_GLOBALS (new_tree)
5002 = save_target_globals_default_opts ();
5003 }
5004
5005 else if (old_tree)
5006 {
5007 new_tree = target_option_current_node;
5008 cl_target_option_restore (&global_options,
5009 TREE_TARGET_OPTION (new_tree));
5010 if (TREE_TARGET_GLOBALS (new_tree))
5011 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5012 else if (new_tree == target_option_default_node)
5013 restore_target_globals (&default_target_globals);
5014 else
5015 TREE_TARGET_GLOBALS (new_tree)
5016 = save_target_globals_default_opts ();
5017 }
5018 }
5019 }
5020
5021 \f
5022 /* Return true if this goes in large data/bss. */
5023
5024 static bool
5025 ix86_in_large_data_p (tree exp)
5026 {
5027 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5028 return false;
5029
5030 /* Functions are never large data. */
5031 if (TREE_CODE (exp) == FUNCTION_DECL)
5032 return false;
5033
5034 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5035 {
5036 const char *section = DECL_SECTION_NAME (exp);
5037 if (strcmp (section, ".ldata") == 0
5038 || strcmp (section, ".lbss") == 0)
5039 return true;
5040 return false;
5041 }
5042 else
5043 {
5044 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5045
5046 /* If this is an incomplete type with size 0, then we can't put it
5047 in data because it might be too big when completed. Also,
5048 int_size_in_bytes returns -1 if size can vary or is larger than
5049 an integer in which case also it is safer to assume that it goes in
5050 large data. */
5051 if (size <= 0 || size > ix86_section_threshold)
5052 return true;
5053 }
5054
5055 return false;
5056 }
5057
5058 /* Switch to the appropriate section for output of DECL.
5059 DECL is either a `VAR_DECL' node or a constant of some sort.
5060 RELOC indicates whether forming the initial value of DECL requires
5061 link-time relocations. */
5062
5063 ATTRIBUTE_UNUSED static section *
5064 x86_64_elf_select_section (tree decl, int reloc,
5065 unsigned HOST_WIDE_INT align)
5066 {
5067 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5068 && ix86_in_large_data_p (decl))
5069 {
5070 const char *sname = NULL;
5071 unsigned int flags = SECTION_WRITE;
5072 switch (categorize_decl_for_section (decl, reloc))
5073 {
5074 case SECCAT_DATA:
5075 sname = ".ldata";
5076 break;
5077 case SECCAT_DATA_REL:
5078 sname = ".ldata.rel";
5079 break;
5080 case SECCAT_DATA_REL_LOCAL:
5081 sname = ".ldata.rel.local";
5082 break;
5083 case SECCAT_DATA_REL_RO:
5084 sname = ".ldata.rel.ro";
5085 break;
5086 case SECCAT_DATA_REL_RO_LOCAL:
5087 sname = ".ldata.rel.ro.local";
5088 break;
5089 case SECCAT_BSS:
5090 sname = ".lbss";
5091 flags |= SECTION_BSS;
5092 break;
5093 case SECCAT_RODATA:
5094 case SECCAT_RODATA_MERGE_STR:
5095 case SECCAT_RODATA_MERGE_STR_INIT:
5096 case SECCAT_RODATA_MERGE_CONST:
5097 sname = ".lrodata";
5098 flags = 0;
5099 break;
5100 case SECCAT_SRODATA:
5101 case SECCAT_SDATA:
5102 case SECCAT_SBSS:
5103 gcc_unreachable ();
5104 case SECCAT_TEXT:
5105 case SECCAT_TDATA:
5106 case SECCAT_TBSS:
5107 /* We don't split these for medium model. Place them into
5108 default sections and hope for best. */
5109 break;
5110 }
5111 if (sname)
5112 {
5113 /* We might get called with string constants, but get_named_section
5114 doesn't like them as they are not DECLs. Also, we need to set
5115 flags in that case. */
5116 if (!DECL_P (decl))
5117 return get_section (sname, flags, NULL);
5118 return get_named_section (decl, sname, reloc);
5119 }
5120 }
5121 return default_elf_select_section (decl, reloc, align);
5122 }
5123
5124 /* Select a set of attributes for section NAME based on the properties
5125 of DECL and whether or not RELOC indicates that DECL's initializer
5126 might contain runtime relocations. */
5127
5128 static unsigned int ATTRIBUTE_UNUSED
5129 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5130 {
5131 unsigned int flags = default_section_type_flags (decl, name, reloc);
5132
5133 if (decl == NULL_TREE
5134 && (strcmp (name, ".ldata.rel.ro") == 0
5135 || strcmp (name, ".ldata.rel.ro.local") == 0))
5136 flags |= SECTION_RELRO;
5137
5138 if (strcmp (name, ".lbss") == 0
5139 || strncmp (name, ".lbss.", 5) == 0
5140 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5141 flags |= SECTION_BSS;
5142
5143 return flags;
5144 }
5145
5146 /* Build up a unique section name, expressed as a
5147 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5148 RELOC indicates whether the initial value of EXP requires
5149 link-time relocations. */
5150
5151 static void ATTRIBUTE_UNUSED
5152 x86_64_elf_unique_section (tree decl, int reloc)
5153 {
5154 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5155 && ix86_in_large_data_p (decl))
5156 {
5157 const char *prefix = NULL;
5158 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5159 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5160
5161 switch (categorize_decl_for_section (decl, reloc))
5162 {
5163 case SECCAT_DATA:
5164 case SECCAT_DATA_REL:
5165 case SECCAT_DATA_REL_LOCAL:
5166 case SECCAT_DATA_REL_RO:
5167 case SECCAT_DATA_REL_RO_LOCAL:
5168 prefix = one_only ? ".ld" : ".ldata";
5169 break;
5170 case SECCAT_BSS:
5171 prefix = one_only ? ".lb" : ".lbss";
5172 break;
5173 case SECCAT_RODATA:
5174 case SECCAT_RODATA_MERGE_STR:
5175 case SECCAT_RODATA_MERGE_STR_INIT:
5176 case SECCAT_RODATA_MERGE_CONST:
5177 prefix = one_only ? ".lr" : ".lrodata";
5178 break;
5179 case SECCAT_SRODATA:
5180 case SECCAT_SDATA:
5181 case SECCAT_SBSS:
5182 gcc_unreachable ();
5183 case SECCAT_TEXT:
5184 case SECCAT_TDATA:
5185 case SECCAT_TBSS:
5186 /* We don't split these for medium model. Place them into
5187 default sections and hope for best. */
5188 break;
5189 }
5190 if (prefix)
5191 {
5192 const char *name, *linkonce;
5193 char *string;
5194
5195 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5196 name = targetm.strip_name_encoding (name);
5197
5198 /* If we're using one_only, then there needs to be a .gnu.linkonce
5199 prefix to the section name. */
5200 linkonce = one_only ? ".gnu.linkonce" : "";
5201
5202 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5203
5204 set_decl_section_name (decl, string);
5205 return;
5206 }
5207 }
5208 default_unique_section (decl, reloc);
5209 }
5210
5211 #ifdef COMMON_ASM_OP
5212 /* This says how to output assembler code to declare an
5213 uninitialized external linkage data object.
5214
5215 For medium model x86-64 we need to use .largecomm opcode for
5216 large objects. */
5217 void
5218 x86_elf_aligned_common (FILE *file,
5219 const char *name, unsigned HOST_WIDE_INT size,
5220 int align)
5221 {
5222 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5223 && size > (unsigned int)ix86_section_threshold)
5224 fputs (".largecomm\t", file);
5225 else
5226 fputs (COMMON_ASM_OP, file);
5227 assemble_name (file, name);
5228 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5229 size, align / BITS_PER_UNIT);
5230 }
5231 #endif
5232
5233 /* Utility function for targets to use in implementing
5234 ASM_OUTPUT_ALIGNED_BSS. */
5235
5236 void
5237 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5238 unsigned HOST_WIDE_INT size, int align)
5239 {
5240 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5241 && size > (unsigned int)ix86_section_threshold)
5242 switch_to_section (get_named_section (decl, ".lbss", 0));
5243 else
5244 switch_to_section (bss_section);
5245 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5246 #ifdef ASM_DECLARE_OBJECT_NAME
5247 last_assemble_variable_decl = decl;
5248 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5249 #else
5250 /* Standard thing is just output label for the object. */
5251 ASM_OUTPUT_LABEL (file, name);
5252 #endif /* ASM_DECLARE_OBJECT_NAME */
5253 ASM_OUTPUT_SKIP (file, size ? size : 1);
5254 }
5255 \f
5256 /* Decide whether we must probe the stack before any space allocation
5257 on this target. It's essentially TARGET_STACK_PROBE except when
5258 -fstack-check causes the stack to be already probed differently. */
5259
5260 bool
5261 ix86_target_stack_probe (void)
5262 {
5263 /* Do not probe the stack twice if static stack checking is enabled. */
5264 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5265 return false;
5266
5267 return TARGET_STACK_PROBE;
5268 }
5269 \f
5270 /* Decide whether we can make a sibling call to a function. DECL is the
5271 declaration of the function being targeted by the call and EXP is the
5272 CALL_EXPR representing the call. */
5273
5274 static bool
5275 ix86_function_ok_for_sibcall (tree decl, tree exp)
5276 {
5277 tree type, decl_or_type;
5278 rtx a, b;
5279
5280 /* If we are generating position-independent code, we cannot sibcall
5281 optimize any indirect call, or a direct call to a global function,
5282 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5283 if (!TARGET_MACHO
5284 && !TARGET_64BIT
5285 && flag_pic
5286 && (!decl || !targetm.binds_local_p (decl)))
5287 return false;
5288
5289 /* If we need to align the outgoing stack, then sibcalling would
5290 unalign the stack, which may break the called function. */
5291 if (ix86_minimum_incoming_stack_boundary (true)
5292 < PREFERRED_STACK_BOUNDARY)
5293 return false;
5294
5295 if (decl)
5296 {
5297 decl_or_type = decl;
5298 type = TREE_TYPE (decl);
5299 }
5300 else
5301 {
5302 /* We're looking at the CALL_EXPR, we need the type of the function. */
5303 type = CALL_EXPR_FN (exp); /* pointer expression */
5304 type = TREE_TYPE (type); /* pointer type */
5305 type = TREE_TYPE (type); /* function type */
5306 decl_or_type = type;
5307 }
5308
5309 /* Check that the return value locations are the same. Like
5310 if we are returning floats on the 80387 register stack, we cannot
5311 make a sibcall from a function that doesn't return a float to a
5312 function that does or, conversely, from a function that does return
5313 a float to a function that doesn't; the necessary stack adjustment
5314 would not be executed. This is also the place we notice
5315 differences in the return value ABI. Note that it is ok for one
5316 of the functions to have void return type as long as the return
5317 value of the other is passed in a register. */
5318 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5319 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5320 cfun->decl, false);
5321 if (STACK_REG_P (a) || STACK_REG_P (b))
5322 {
5323 if (!rtx_equal_p (a, b))
5324 return false;
5325 }
5326 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5327 ;
5328 else if (!rtx_equal_p (a, b))
5329 return false;
5330
5331 if (TARGET_64BIT)
5332 {
5333 /* The SYSV ABI has more call-clobbered registers;
5334 disallow sibcalls from MS to SYSV. */
5335 if (cfun->machine->call_abi == MS_ABI
5336 && ix86_function_type_abi (type) == SYSV_ABI)
5337 return false;
5338 }
5339 else
5340 {
5341 /* If this call is indirect, we'll need to be able to use a
5342 call-clobbered register for the address of the target function.
5343 Make sure that all such registers are not used for passing
5344 parameters. Note that DLLIMPORT functions are indirect. */
5345 if (!decl
5346 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5347 {
5348 if (ix86_function_regparm (type, NULL) >= 3)
5349 {
5350 /* ??? Need to count the actual number of registers to be used,
5351 not the possible number of registers. Fix later. */
5352 return false;
5353 }
5354 }
5355 }
5356
5357 /* Otherwise okay. That also includes certain types of indirect calls. */
5358 return true;
5359 }
5360
5361 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5362 and "sseregparm" calling convention attributes;
5363 arguments as in struct attribute_spec.handler. */
5364
5365 static tree
5366 ix86_handle_cconv_attribute (tree *node, tree name,
5367 tree args,
5368 int,
5369 bool *no_add_attrs)
5370 {
5371 if (TREE_CODE (*node) != FUNCTION_TYPE
5372 && TREE_CODE (*node) != METHOD_TYPE
5373 && TREE_CODE (*node) != FIELD_DECL
5374 && TREE_CODE (*node) != TYPE_DECL)
5375 {
5376 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5377 name);
5378 *no_add_attrs = true;
5379 return NULL_TREE;
5380 }
5381
5382 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5383 if (is_attribute_p ("regparm", name))
5384 {
5385 tree cst;
5386
5387 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5388 {
5389 error ("fastcall and regparm attributes are not compatible");
5390 }
5391
5392 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5393 {
5394 error ("regparam and thiscall attributes are not compatible");
5395 }
5396
5397 cst = TREE_VALUE (args);
5398 if (TREE_CODE (cst) != INTEGER_CST)
5399 {
5400 warning (OPT_Wattributes,
5401 "%qE attribute requires an integer constant argument",
5402 name);
5403 *no_add_attrs = true;
5404 }
5405 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5406 {
5407 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5408 name, REGPARM_MAX);
5409 *no_add_attrs = true;
5410 }
5411
5412 return NULL_TREE;
5413 }
5414
5415 if (TARGET_64BIT)
5416 {
5417 /* Do not warn when emulating the MS ABI. */
5418 if ((TREE_CODE (*node) != FUNCTION_TYPE
5419 && TREE_CODE (*node) != METHOD_TYPE)
5420 || ix86_function_type_abi (*node) != MS_ABI)
5421 warning (OPT_Wattributes, "%qE attribute ignored",
5422 name);
5423 *no_add_attrs = true;
5424 return NULL_TREE;
5425 }
5426
5427 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5428 if (is_attribute_p ("fastcall", name))
5429 {
5430 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5431 {
5432 error ("fastcall and cdecl attributes are not compatible");
5433 }
5434 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5435 {
5436 error ("fastcall and stdcall attributes are not compatible");
5437 }
5438 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5439 {
5440 error ("fastcall and regparm attributes are not compatible");
5441 }
5442 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5443 {
5444 error ("fastcall and thiscall attributes are not compatible");
5445 }
5446 }
5447
5448 /* Can combine stdcall with fastcall (redundant), regparm and
5449 sseregparm. */
5450 else if (is_attribute_p ("stdcall", name))
5451 {
5452 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5453 {
5454 error ("stdcall and cdecl attributes are not compatible");
5455 }
5456 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5457 {
5458 error ("stdcall and fastcall attributes are not compatible");
5459 }
5460 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5461 {
5462 error ("stdcall and thiscall attributes are not compatible");
5463 }
5464 }
5465
5466 /* Can combine cdecl with regparm and sseregparm. */
5467 else if (is_attribute_p ("cdecl", name))
5468 {
5469 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5470 {
5471 error ("stdcall and cdecl attributes are not compatible");
5472 }
5473 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5474 {
5475 error ("fastcall and cdecl attributes are not compatible");
5476 }
5477 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5478 {
5479 error ("cdecl and thiscall attributes are not compatible");
5480 }
5481 }
5482 else if (is_attribute_p ("thiscall", name))
5483 {
5484 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5485 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5486 name);
5487 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5488 {
5489 error ("stdcall and thiscall attributes are not compatible");
5490 }
5491 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5492 {
5493 error ("fastcall and thiscall attributes are not compatible");
5494 }
5495 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5496 {
5497 error ("cdecl and thiscall attributes are not compatible");
5498 }
5499 }
5500
5501 /* Can combine sseregparm with all attributes. */
5502
5503 return NULL_TREE;
5504 }
5505
5506 /* The transactional memory builtins are implicitly regparm or fastcall
5507 depending on the ABI. Override the generic do-nothing attribute that
5508 these builtins were declared with, and replace it with one of the two
5509 attributes that we expect elsewhere. */
5510
5511 static tree
5512 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5513 int flags, bool *no_add_attrs)
5514 {
5515 tree alt;
5516
5517 /* In no case do we want to add the placeholder attribute. */
5518 *no_add_attrs = true;
5519
5520 /* The 64-bit ABI is unchanged for transactional memory. */
5521 if (TARGET_64BIT)
5522 return NULL_TREE;
5523
5524 /* ??? Is there a better way to validate 32-bit windows? We have
5525 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5526 if (CHECK_STACK_LIMIT > 0)
5527 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5528 else
5529 {
5530 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5531 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5532 }
5533 decl_attributes (node, alt, flags);
5534
5535 return NULL_TREE;
5536 }
5537
5538 /* This function determines from TYPE the calling-convention. */
5539
5540 unsigned int
5541 ix86_get_callcvt (const_tree type)
5542 {
5543 unsigned int ret = 0;
5544 bool is_stdarg;
5545 tree attrs;
5546
5547 if (TARGET_64BIT)
5548 return IX86_CALLCVT_CDECL;
5549
5550 attrs = TYPE_ATTRIBUTES (type);
5551 if (attrs != NULL_TREE)
5552 {
5553 if (lookup_attribute ("cdecl", attrs))
5554 ret |= IX86_CALLCVT_CDECL;
5555 else if (lookup_attribute ("stdcall", attrs))
5556 ret |= IX86_CALLCVT_STDCALL;
5557 else if (lookup_attribute ("fastcall", attrs))
5558 ret |= IX86_CALLCVT_FASTCALL;
5559 else if (lookup_attribute ("thiscall", attrs))
5560 ret |= IX86_CALLCVT_THISCALL;
5561
5562 /* Regparam isn't allowed for thiscall and fastcall. */
5563 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5564 {
5565 if (lookup_attribute ("regparm", attrs))
5566 ret |= IX86_CALLCVT_REGPARM;
5567 if (lookup_attribute ("sseregparm", attrs))
5568 ret |= IX86_CALLCVT_SSEREGPARM;
5569 }
5570
5571 if (IX86_BASE_CALLCVT(ret) != 0)
5572 return ret;
5573 }
5574
5575 is_stdarg = stdarg_p (type);
5576 if (TARGET_RTD && !is_stdarg)
5577 return IX86_CALLCVT_STDCALL | ret;
5578
5579 if (ret != 0
5580 || is_stdarg
5581 || TREE_CODE (type) != METHOD_TYPE
5582 || ix86_function_type_abi (type) != MS_ABI)
5583 return IX86_CALLCVT_CDECL | ret;
5584
5585 return IX86_CALLCVT_THISCALL;
5586 }
5587
5588 /* Return 0 if the attributes for two types are incompatible, 1 if they
5589 are compatible, and 2 if they are nearly compatible (which causes a
5590 warning to be generated). */
5591
5592 static int
5593 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5594 {
5595 unsigned int ccvt1, ccvt2;
5596
5597 if (TREE_CODE (type1) != FUNCTION_TYPE
5598 && TREE_CODE (type1) != METHOD_TYPE)
5599 return 1;
5600
5601 ccvt1 = ix86_get_callcvt (type1);
5602 ccvt2 = ix86_get_callcvt (type2);
5603 if (ccvt1 != ccvt2)
5604 return 0;
5605 if (ix86_function_regparm (type1, NULL)
5606 != ix86_function_regparm (type2, NULL))
5607 return 0;
5608
5609 return 1;
5610 }
5611 \f
5612 /* Return the regparm value for a function with the indicated TYPE and DECL.
5613 DECL may be NULL when calling function indirectly
5614 or considering a libcall. */
5615
5616 static int
5617 ix86_function_regparm (const_tree type, const_tree decl)
5618 {
5619 tree attr;
5620 int regparm;
5621 unsigned int ccvt;
5622
5623 if (TARGET_64BIT)
5624 return (ix86_function_type_abi (type) == SYSV_ABI
5625 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5626 ccvt = ix86_get_callcvt (type);
5627 regparm = ix86_regparm;
5628
5629 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5630 {
5631 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5632 if (attr)
5633 {
5634 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5635 return regparm;
5636 }
5637 }
5638 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5639 return 2;
5640 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5641 return 1;
5642
5643 /* Use register calling convention for local functions when possible. */
5644 if (decl
5645 && TREE_CODE (decl) == FUNCTION_DECL
5646 /* Caller and callee must agree on the calling convention, so
5647 checking here just optimize means that with
5648 __attribute__((optimize (...))) caller could use regparm convention
5649 and callee not, or vice versa. Instead look at whether the callee
5650 is optimized or not. */
5651 && opt_for_fn (decl, optimize)
5652 && !(profile_flag && !flag_fentry))
5653 {
5654 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5655 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5656 if (i && i->local && i->can_change_signature)
5657 {
5658 int local_regparm, globals = 0, regno;
5659
5660 /* Make sure no regparm register is taken by a
5661 fixed register variable. */
5662 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5663 if (fixed_regs[local_regparm])
5664 break;
5665
5666 /* We don't want to use regparm(3) for nested functions as
5667 these use a static chain pointer in the third argument. */
5668 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5669 local_regparm = 2;
5670
5671 /* In 32-bit mode save a register for the split stack. */
5672 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5673 local_regparm = 2;
5674
5675 /* Each fixed register usage increases register pressure,
5676 so less registers should be used for argument passing.
5677 This functionality can be overriden by an explicit
5678 regparm value. */
5679 for (regno = AX_REG; regno <= DI_REG; regno++)
5680 if (fixed_regs[regno])
5681 globals++;
5682
5683 local_regparm
5684 = globals < local_regparm ? local_regparm - globals : 0;
5685
5686 if (local_regparm > regparm)
5687 regparm = local_regparm;
5688 }
5689 }
5690
5691 return regparm;
5692 }
5693
5694 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5695 DFmode (2) arguments in SSE registers for a function with the
5696 indicated TYPE and DECL. DECL may be NULL when calling function
5697 indirectly or considering a libcall. Otherwise return 0. */
5698
5699 static int
5700 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5701 {
5702 gcc_assert (!TARGET_64BIT);
5703
5704 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5705 by the sseregparm attribute. */
5706 if (TARGET_SSEREGPARM
5707 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5708 {
5709 if (!TARGET_SSE)
5710 {
5711 if (warn)
5712 {
5713 if (decl)
5714 error ("calling %qD with attribute sseregparm without "
5715 "SSE/SSE2 enabled", decl);
5716 else
5717 error ("calling %qT with attribute sseregparm without "
5718 "SSE/SSE2 enabled", type);
5719 }
5720 return 0;
5721 }
5722
5723 return 2;
5724 }
5725
5726 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5727 (and DFmode for SSE2) arguments in SSE registers. */
5728 if (decl && TARGET_SSE_MATH && optimize
5729 && !(profile_flag && !flag_fentry))
5730 {
5731 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5732 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5733 if (i && i->local && i->can_change_signature)
5734 return TARGET_SSE2 ? 2 : 1;
5735 }
5736
5737 return 0;
5738 }
5739
5740 /* Return true if EAX is live at the start of the function. Used by
5741 ix86_expand_prologue to determine if we need special help before
5742 calling allocate_stack_worker. */
5743
5744 static bool
5745 ix86_eax_live_at_start_p (void)
5746 {
5747 /* Cheat. Don't bother working forward from ix86_function_regparm
5748 to the function type to whether an actual argument is located in
5749 eax. Instead just look at cfg info, which is still close enough
5750 to correct at this point. This gives false positives for broken
5751 functions that might use uninitialized data that happens to be
5752 allocated in eax, but who cares? */
5753 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5754 }
5755
5756 static bool
5757 ix86_keep_aggregate_return_pointer (tree fntype)
5758 {
5759 tree attr;
5760
5761 if (!TARGET_64BIT)
5762 {
5763 attr = lookup_attribute ("callee_pop_aggregate_return",
5764 TYPE_ATTRIBUTES (fntype));
5765 if (attr)
5766 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5767
5768 /* For 32-bit MS-ABI the default is to keep aggregate
5769 return pointer. */
5770 if (ix86_function_type_abi (fntype) == MS_ABI)
5771 return true;
5772 }
5773 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5774 }
5775
5776 /* Value is the number of bytes of arguments automatically
5777 popped when returning from a subroutine call.
5778 FUNDECL is the declaration node of the function (as a tree),
5779 FUNTYPE is the data type of the function (as a tree),
5780 or for a library call it is an identifier node for the subroutine name.
5781 SIZE is the number of bytes of arguments passed on the stack.
5782
5783 On the 80386, the RTD insn may be used to pop them if the number
5784 of args is fixed, but if the number is variable then the caller
5785 must pop them all. RTD can't be used for library calls now
5786 because the library is compiled with the Unix compiler.
5787 Use of RTD is a selectable option, since it is incompatible with
5788 standard Unix calling sequences. If the option is not selected,
5789 the caller must always pop the args.
5790
5791 The attribute stdcall is equivalent to RTD on a per module basis. */
5792
5793 static int
5794 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5795 {
5796 unsigned int ccvt;
5797
5798 /* None of the 64-bit ABIs pop arguments. */
5799 if (TARGET_64BIT)
5800 return 0;
5801
5802 ccvt = ix86_get_callcvt (funtype);
5803
5804 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5805 | IX86_CALLCVT_THISCALL)) != 0
5806 && ! stdarg_p (funtype))
5807 return size;
5808
5809 /* Lose any fake structure return argument if it is passed on the stack. */
5810 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5811 && !ix86_keep_aggregate_return_pointer (funtype))
5812 {
5813 int nregs = ix86_function_regparm (funtype, fundecl);
5814 if (nregs == 0)
5815 return GET_MODE_SIZE (Pmode);
5816 }
5817
5818 return 0;
5819 }
5820
5821 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5822
5823 static bool
5824 ix86_legitimate_combined_insn (rtx insn)
5825 {
5826 /* Check operand constraints in case hard registers were propagated
5827 into insn pattern. This check prevents combine pass from
5828 generating insn patterns with invalid hard register operands.
5829 These invalid insns can eventually confuse reload to error out
5830 with a spill failure. See also PRs 46829 and 46843. */
5831 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5832 {
5833 int i;
5834
5835 extract_insn (insn);
5836 preprocess_constraints (insn);
5837
5838 int n_operands = recog_data.n_operands;
5839 int n_alternatives = recog_data.n_alternatives;
5840 for (i = 0; i < n_operands; i++)
5841 {
5842 rtx op = recog_data.operand[i];
5843 enum machine_mode mode = GET_MODE (op);
5844 const operand_alternative *op_alt;
5845 int offset = 0;
5846 bool win;
5847 int j;
5848
5849 /* For pre-AVX disallow unaligned loads/stores where the
5850 instructions don't support it. */
5851 if (!TARGET_AVX
5852 && VECTOR_MODE_P (GET_MODE (op))
5853 && misaligned_operand (op, GET_MODE (op)))
5854 {
5855 int min_align = get_attr_ssememalign (insn);
5856 if (min_align == 0)
5857 return false;
5858 }
5859
5860 /* A unary operator may be accepted by the predicate, but it
5861 is irrelevant for matching constraints. */
5862 if (UNARY_P (op))
5863 op = XEXP (op, 0);
5864
5865 if (GET_CODE (op) == SUBREG)
5866 {
5867 if (REG_P (SUBREG_REG (op))
5868 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5869 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5870 GET_MODE (SUBREG_REG (op)),
5871 SUBREG_BYTE (op),
5872 GET_MODE (op));
5873 op = SUBREG_REG (op);
5874 }
5875
5876 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5877 continue;
5878
5879 op_alt = recog_op_alt;
5880
5881 /* Operand has no constraints, anything is OK. */
5882 win = !n_alternatives;
5883
5884 alternative_mask enabled = recog_data.enabled_alternatives;
5885 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5886 {
5887 if (!TEST_BIT (enabled, j))
5888 continue;
5889 if (op_alt[i].anything_ok
5890 || (op_alt[i].matches != -1
5891 && operands_match_p
5892 (recog_data.operand[i],
5893 recog_data.operand[op_alt[i].matches]))
5894 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5895 {
5896 win = true;
5897 break;
5898 }
5899 }
5900
5901 if (!win)
5902 return false;
5903 }
5904 }
5905
5906 return true;
5907 }
5908 \f
5909 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5910
5911 static unsigned HOST_WIDE_INT
5912 ix86_asan_shadow_offset (void)
5913 {
5914 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5915 : HOST_WIDE_INT_C (0x7fff8000))
5916 : (HOST_WIDE_INT_1 << 29);
5917 }
5918 \f
5919 /* Argument support functions. */
5920
5921 /* Return true when register may be used to pass function parameters. */
5922 bool
5923 ix86_function_arg_regno_p (int regno)
5924 {
5925 int i;
5926 const int *parm_regs;
5927
5928 if (!TARGET_64BIT)
5929 {
5930 if (TARGET_MACHO)
5931 return (regno < REGPARM_MAX
5932 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5933 else
5934 return (regno < REGPARM_MAX
5935 || (TARGET_MMX && MMX_REGNO_P (regno)
5936 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5937 || (TARGET_SSE && SSE_REGNO_P (regno)
5938 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5939 }
5940
5941 if (TARGET_SSE && SSE_REGNO_P (regno)
5942 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5943 return true;
5944
5945 /* TODO: The function should depend on current function ABI but
5946 builtins.c would need updating then. Therefore we use the
5947 default ABI. */
5948
5949 /* RAX is used as hidden argument to va_arg functions. */
5950 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5951 return true;
5952
5953 if (ix86_abi == MS_ABI)
5954 parm_regs = x86_64_ms_abi_int_parameter_registers;
5955 else
5956 parm_regs = x86_64_int_parameter_registers;
5957 for (i = 0; i < (ix86_abi == MS_ABI
5958 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5959 if (regno == parm_regs[i])
5960 return true;
5961 return false;
5962 }
5963
5964 /* Return if we do not know how to pass TYPE solely in registers. */
5965
5966 static bool
5967 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5968 {
5969 if (must_pass_in_stack_var_size_or_pad (mode, type))
5970 return true;
5971
5972 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5973 The layout_type routine is crafty and tries to trick us into passing
5974 currently unsupported vector types on the stack by using TImode. */
5975 return (!TARGET_64BIT && mode == TImode
5976 && type && TREE_CODE (type) != VECTOR_TYPE);
5977 }
5978
5979 /* It returns the size, in bytes, of the area reserved for arguments passed
5980 in registers for the function represented by fndecl dependent to the used
5981 abi format. */
5982 int
5983 ix86_reg_parm_stack_space (const_tree fndecl)
5984 {
5985 enum calling_abi call_abi = SYSV_ABI;
5986 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5987 call_abi = ix86_function_abi (fndecl);
5988 else
5989 call_abi = ix86_function_type_abi (fndecl);
5990 if (TARGET_64BIT && call_abi == MS_ABI)
5991 return 32;
5992 return 0;
5993 }
5994
5995 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5996 call abi used. */
5997 enum calling_abi
5998 ix86_function_type_abi (const_tree fntype)
5999 {
6000 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6001 {
6002 enum calling_abi abi = ix86_abi;
6003 if (abi == SYSV_ABI)
6004 {
6005 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6006 abi = MS_ABI;
6007 }
6008 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6009 abi = SYSV_ABI;
6010 return abi;
6011 }
6012 return ix86_abi;
6013 }
6014
6015 /* We add this as a workaround in order to use libc_has_function
6016 hook in i386.md. */
6017 bool
6018 ix86_libc_has_function (enum function_class fn_class)
6019 {
6020 return targetm.libc_has_function (fn_class);
6021 }
6022
6023 static bool
6024 ix86_function_ms_hook_prologue (const_tree fn)
6025 {
6026 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6027 {
6028 if (decl_function_context (fn) != NULL_TREE)
6029 error_at (DECL_SOURCE_LOCATION (fn),
6030 "ms_hook_prologue is not compatible with nested function");
6031 else
6032 return true;
6033 }
6034 return false;
6035 }
6036
6037 static enum calling_abi
6038 ix86_function_abi (const_tree fndecl)
6039 {
6040 if (! fndecl)
6041 return ix86_abi;
6042 return ix86_function_type_abi (TREE_TYPE (fndecl));
6043 }
6044
6045 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6046 call abi used. */
6047 enum calling_abi
6048 ix86_cfun_abi (void)
6049 {
6050 if (! cfun)
6051 return ix86_abi;
6052 return cfun->machine->call_abi;
6053 }
6054
6055 /* Write the extra assembler code needed to declare a function properly. */
6056
6057 void
6058 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6059 tree decl)
6060 {
6061 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6062
6063 if (is_ms_hook)
6064 {
6065 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6066 unsigned int filler_cc = 0xcccccccc;
6067
6068 for (i = 0; i < filler_count; i += 4)
6069 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6070 }
6071
6072 #ifdef SUBTARGET_ASM_UNWIND_INIT
6073 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6074 #endif
6075
6076 ASM_OUTPUT_LABEL (asm_out_file, fname);
6077
6078 /* Output magic byte marker, if hot-patch attribute is set. */
6079 if (is_ms_hook)
6080 {
6081 if (TARGET_64BIT)
6082 {
6083 /* leaq [%rsp + 0], %rsp */
6084 asm_fprintf (asm_out_file, ASM_BYTE
6085 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6086 }
6087 else
6088 {
6089 /* movl.s %edi, %edi
6090 push %ebp
6091 movl.s %esp, %ebp */
6092 asm_fprintf (asm_out_file, ASM_BYTE
6093 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6094 }
6095 }
6096 }
6097
6098 /* regclass.c */
6099 extern void init_regs (void);
6100
6101 /* Implementation of call abi switching target hook. Specific to FNDECL
6102 the specific call register sets are set. See also
6103 ix86_conditional_register_usage for more details. */
6104 void
6105 ix86_call_abi_override (const_tree fndecl)
6106 {
6107 if (fndecl == NULL_TREE)
6108 cfun->machine->call_abi = ix86_abi;
6109 else
6110 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6111 }
6112
6113 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6114 expensive re-initialization of init_regs each time we switch function context
6115 since this is needed only during RTL expansion. */
6116 static void
6117 ix86_maybe_switch_abi (void)
6118 {
6119 if (TARGET_64BIT &&
6120 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6121 reinit_regs ();
6122 }
6123
6124 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6125 for a call to a function whose data type is FNTYPE.
6126 For a library call, FNTYPE is 0. */
6127
6128 void
6129 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6130 tree fntype, /* tree ptr for function decl */
6131 rtx libname, /* SYMBOL_REF of library name or 0 */
6132 tree fndecl,
6133 int caller)
6134 {
6135 struct cgraph_local_info *i;
6136
6137 memset (cum, 0, sizeof (*cum));
6138
6139 if (fndecl)
6140 {
6141 i = cgraph_local_info (fndecl);
6142 cum->call_abi = ix86_function_abi (fndecl);
6143 }
6144 else
6145 {
6146 i = NULL;
6147 cum->call_abi = ix86_function_type_abi (fntype);
6148 }
6149
6150 cum->caller = caller;
6151
6152 /* Set up the number of registers to use for passing arguments. */
6153 cum->nregs = ix86_regparm;
6154 if (TARGET_64BIT)
6155 {
6156 cum->nregs = (cum->call_abi == SYSV_ABI
6157 ? X86_64_REGPARM_MAX
6158 : X86_64_MS_REGPARM_MAX);
6159 }
6160 if (TARGET_SSE)
6161 {
6162 cum->sse_nregs = SSE_REGPARM_MAX;
6163 if (TARGET_64BIT)
6164 {
6165 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6166 ? X86_64_SSE_REGPARM_MAX
6167 : X86_64_MS_SSE_REGPARM_MAX);
6168 }
6169 }
6170 if (TARGET_MMX)
6171 cum->mmx_nregs = MMX_REGPARM_MAX;
6172 cum->warn_avx512f = true;
6173 cum->warn_avx = true;
6174 cum->warn_sse = true;
6175 cum->warn_mmx = true;
6176
6177 /* Because type might mismatch in between caller and callee, we need to
6178 use actual type of function for local calls.
6179 FIXME: cgraph_analyze can be told to actually record if function uses
6180 va_start so for local functions maybe_vaarg can be made aggressive
6181 helping K&R code.
6182 FIXME: once typesytem is fixed, we won't need this code anymore. */
6183 if (i && i->local && i->can_change_signature)
6184 fntype = TREE_TYPE (fndecl);
6185 cum->maybe_vaarg = (fntype
6186 ? (!prototype_p (fntype) || stdarg_p (fntype))
6187 : !libname);
6188
6189 if (!TARGET_64BIT)
6190 {
6191 /* If there are variable arguments, then we won't pass anything
6192 in registers in 32-bit mode. */
6193 if (stdarg_p (fntype))
6194 {
6195 cum->nregs = 0;
6196 cum->sse_nregs = 0;
6197 cum->mmx_nregs = 0;
6198 cum->warn_avx512f = false;
6199 cum->warn_avx = false;
6200 cum->warn_sse = false;
6201 cum->warn_mmx = false;
6202 return;
6203 }
6204
6205 /* Use ecx and edx registers if function has fastcall attribute,
6206 else look for regparm information. */
6207 if (fntype)
6208 {
6209 unsigned int ccvt = ix86_get_callcvt (fntype);
6210 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6211 {
6212 cum->nregs = 1;
6213 cum->fastcall = 1; /* Same first register as in fastcall. */
6214 }
6215 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6216 {
6217 cum->nregs = 2;
6218 cum->fastcall = 1;
6219 }
6220 else
6221 cum->nregs = ix86_function_regparm (fntype, fndecl);
6222 }
6223
6224 /* Set up the number of SSE registers used for passing SFmode
6225 and DFmode arguments. Warn for mismatching ABI. */
6226 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6227 }
6228 }
6229
6230 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6231 But in the case of vector types, it is some vector mode.
6232
6233 When we have only some of our vector isa extensions enabled, then there
6234 are some modes for which vector_mode_supported_p is false. For these
6235 modes, the generic vector support in gcc will choose some non-vector mode
6236 in order to implement the type. By computing the natural mode, we'll
6237 select the proper ABI location for the operand and not depend on whatever
6238 the middle-end decides to do with these vector types.
6239
6240 The midde-end can't deal with the vector types > 16 bytes. In this
6241 case, we return the original mode and warn ABI change if CUM isn't
6242 NULL.
6243
6244 If INT_RETURN is true, warn ABI change if the vector mode isn't
6245 available for function return value. */
6246
6247 static enum machine_mode
6248 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6249 bool in_return)
6250 {
6251 enum machine_mode mode = TYPE_MODE (type);
6252
6253 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6254 {
6255 HOST_WIDE_INT size = int_size_in_bytes (type);
6256 if ((size == 8 || size == 16 || size == 32 || size == 64)
6257 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6258 && TYPE_VECTOR_SUBPARTS (type) > 1)
6259 {
6260 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6261
6262 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6263 mode = MIN_MODE_VECTOR_FLOAT;
6264 else
6265 mode = MIN_MODE_VECTOR_INT;
6266
6267 /* Get the mode which has this inner mode and number of units. */
6268 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6269 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6270 && GET_MODE_INNER (mode) == innermode)
6271 {
6272 if (size == 64 && !TARGET_AVX512F)
6273 {
6274 static bool warnedavx512f;
6275 static bool warnedavx512f_ret;
6276
6277 if (cum && cum->warn_avx512f && !warnedavx512f)
6278 {
6279 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6280 "without AVX512F enabled changes the ABI"))
6281 warnedavx512f = true;
6282 }
6283 else if (in_return && !warnedavx512f_ret)
6284 {
6285 if (warning (OPT_Wpsabi, "AVX512F vector return "
6286 "without AVX512F enabled changes the ABI"))
6287 warnedavx512f_ret = true;
6288 }
6289
6290 return TYPE_MODE (type);
6291 }
6292 else if (size == 32 && !TARGET_AVX)
6293 {
6294 static bool warnedavx;
6295 static bool warnedavx_ret;
6296
6297 if (cum && cum->warn_avx && !warnedavx)
6298 {
6299 if (warning (OPT_Wpsabi, "AVX vector argument "
6300 "without AVX enabled changes the ABI"))
6301 warnedavx = true;
6302 }
6303 else if (in_return && !warnedavx_ret)
6304 {
6305 if (warning (OPT_Wpsabi, "AVX vector return "
6306 "without AVX enabled changes the ABI"))
6307 warnedavx_ret = true;
6308 }
6309
6310 return TYPE_MODE (type);
6311 }
6312 else if (((size == 8 && TARGET_64BIT) || size == 16)
6313 && !TARGET_SSE)
6314 {
6315 static bool warnedsse;
6316 static bool warnedsse_ret;
6317
6318 if (cum && cum->warn_sse && !warnedsse)
6319 {
6320 if (warning (OPT_Wpsabi, "SSE vector argument "
6321 "without SSE enabled changes the ABI"))
6322 warnedsse = true;
6323 }
6324 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6325 {
6326 if (warning (OPT_Wpsabi, "SSE vector return "
6327 "without SSE enabled changes the ABI"))
6328 warnedsse_ret = true;
6329 }
6330 }
6331 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6332 {
6333 static bool warnedmmx;
6334 static bool warnedmmx_ret;
6335
6336 if (cum && cum->warn_mmx && !warnedmmx)
6337 {
6338 if (warning (OPT_Wpsabi, "MMX vector argument "
6339 "without MMX enabled changes the ABI"))
6340 warnedmmx = true;
6341 }
6342 else if (in_return && !warnedmmx_ret)
6343 {
6344 if (warning (OPT_Wpsabi, "MMX vector return "
6345 "without MMX enabled changes the ABI"))
6346 warnedmmx_ret = true;
6347 }
6348 }
6349 return mode;
6350 }
6351
6352 gcc_unreachable ();
6353 }
6354 }
6355
6356 return mode;
6357 }
6358
6359 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6360 this may not agree with the mode that the type system has chosen for the
6361 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6362 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6363
6364 static rtx
6365 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6366 unsigned int regno)
6367 {
6368 rtx tmp;
6369
6370 if (orig_mode != BLKmode)
6371 tmp = gen_rtx_REG (orig_mode, regno);
6372 else
6373 {
6374 tmp = gen_rtx_REG (mode, regno);
6375 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6376 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6377 }
6378
6379 return tmp;
6380 }
6381
6382 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6383 of this code is to classify each 8bytes of incoming argument by the register
6384 class and assign registers accordingly. */
6385
6386 /* Return the union class of CLASS1 and CLASS2.
6387 See the x86-64 PS ABI for details. */
6388
6389 static enum x86_64_reg_class
6390 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6391 {
6392 /* Rule #1: If both classes are equal, this is the resulting class. */
6393 if (class1 == class2)
6394 return class1;
6395
6396 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6397 the other class. */
6398 if (class1 == X86_64_NO_CLASS)
6399 return class2;
6400 if (class2 == X86_64_NO_CLASS)
6401 return class1;
6402
6403 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6404 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6405 return X86_64_MEMORY_CLASS;
6406
6407 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6408 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6409 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6410 return X86_64_INTEGERSI_CLASS;
6411 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6412 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6413 return X86_64_INTEGER_CLASS;
6414
6415 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6416 MEMORY is used. */
6417 if (class1 == X86_64_X87_CLASS
6418 || class1 == X86_64_X87UP_CLASS
6419 || class1 == X86_64_COMPLEX_X87_CLASS
6420 || class2 == X86_64_X87_CLASS
6421 || class2 == X86_64_X87UP_CLASS
6422 || class2 == X86_64_COMPLEX_X87_CLASS)
6423 return X86_64_MEMORY_CLASS;
6424
6425 /* Rule #6: Otherwise class SSE is used. */
6426 return X86_64_SSE_CLASS;
6427 }
6428
6429 /* Classify the argument of type TYPE and mode MODE.
6430 CLASSES will be filled by the register class used to pass each word
6431 of the operand. The number of words is returned. In case the parameter
6432 should be passed in memory, 0 is returned. As a special case for zero
6433 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6434
6435 BIT_OFFSET is used internally for handling records and specifies offset
6436 of the offset in bits modulo 512 to avoid overflow cases.
6437
6438 See the x86-64 PS ABI for details.
6439 */
6440
6441 static int
6442 classify_argument (enum machine_mode mode, const_tree type,
6443 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6444 {
6445 HOST_WIDE_INT bytes =
6446 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6447 int words
6448 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6449
6450 /* Variable sized entities are always passed/returned in memory. */
6451 if (bytes < 0)
6452 return 0;
6453
6454 if (mode != VOIDmode
6455 && targetm.calls.must_pass_in_stack (mode, type))
6456 return 0;
6457
6458 if (type && AGGREGATE_TYPE_P (type))
6459 {
6460 int i;
6461 tree field;
6462 enum x86_64_reg_class subclasses[MAX_CLASSES];
6463
6464 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6465 if (bytes > 64)
6466 return 0;
6467
6468 for (i = 0; i < words; i++)
6469 classes[i] = X86_64_NO_CLASS;
6470
6471 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6472 signalize memory class, so handle it as special case. */
6473 if (!words)
6474 {
6475 classes[0] = X86_64_NO_CLASS;
6476 return 1;
6477 }
6478
6479 /* Classify each field of record and merge classes. */
6480 switch (TREE_CODE (type))
6481 {
6482 case RECORD_TYPE:
6483 /* And now merge the fields of structure. */
6484 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6485 {
6486 if (TREE_CODE (field) == FIELD_DECL)
6487 {
6488 int num;
6489
6490 if (TREE_TYPE (field) == error_mark_node)
6491 continue;
6492
6493 /* Bitfields are always classified as integer. Handle them
6494 early, since later code would consider them to be
6495 misaligned integers. */
6496 if (DECL_BIT_FIELD (field))
6497 {
6498 for (i = (int_bit_position (field)
6499 + (bit_offset % 64)) / 8 / 8;
6500 i < ((int_bit_position (field) + (bit_offset % 64))
6501 + tree_to_shwi (DECL_SIZE (field))
6502 + 63) / 8 / 8; i++)
6503 classes[i] =
6504 merge_classes (X86_64_INTEGER_CLASS,
6505 classes[i]);
6506 }
6507 else
6508 {
6509 int pos;
6510
6511 type = TREE_TYPE (field);
6512
6513 /* Flexible array member is ignored. */
6514 if (TYPE_MODE (type) == BLKmode
6515 && TREE_CODE (type) == ARRAY_TYPE
6516 && TYPE_SIZE (type) == NULL_TREE
6517 && TYPE_DOMAIN (type) != NULL_TREE
6518 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6519 == NULL_TREE))
6520 {
6521 static bool warned;
6522
6523 if (!warned && warn_psabi)
6524 {
6525 warned = true;
6526 inform (input_location,
6527 "the ABI of passing struct with"
6528 " a flexible array member has"
6529 " changed in GCC 4.4");
6530 }
6531 continue;
6532 }
6533 num = classify_argument (TYPE_MODE (type), type,
6534 subclasses,
6535 (int_bit_position (field)
6536 + bit_offset) % 512);
6537 if (!num)
6538 return 0;
6539 pos = (int_bit_position (field)
6540 + (bit_offset % 64)) / 8 / 8;
6541 for (i = 0; i < num && (i + pos) < words; i++)
6542 classes[i + pos] =
6543 merge_classes (subclasses[i], classes[i + pos]);
6544 }
6545 }
6546 }
6547 break;
6548
6549 case ARRAY_TYPE:
6550 /* Arrays are handled as small records. */
6551 {
6552 int num;
6553 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6554 TREE_TYPE (type), subclasses, bit_offset);
6555 if (!num)
6556 return 0;
6557
6558 /* The partial classes are now full classes. */
6559 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6560 subclasses[0] = X86_64_SSE_CLASS;
6561 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6562 && !((bit_offset % 64) == 0 && bytes == 4))
6563 subclasses[0] = X86_64_INTEGER_CLASS;
6564
6565 for (i = 0; i < words; i++)
6566 classes[i] = subclasses[i % num];
6567
6568 break;
6569 }
6570 case UNION_TYPE:
6571 case QUAL_UNION_TYPE:
6572 /* Unions are similar to RECORD_TYPE but offset is always 0.
6573 */
6574 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6575 {
6576 if (TREE_CODE (field) == FIELD_DECL)
6577 {
6578 int num;
6579
6580 if (TREE_TYPE (field) == error_mark_node)
6581 continue;
6582
6583 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6584 TREE_TYPE (field), subclasses,
6585 bit_offset);
6586 if (!num)
6587 return 0;
6588 for (i = 0; i < num && i < words; i++)
6589 classes[i] = merge_classes (subclasses[i], classes[i]);
6590 }
6591 }
6592 break;
6593
6594 default:
6595 gcc_unreachable ();
6596 }
6597
6598 if (words > 2)
6599 {
6600 /* When size > 16 bytes, if the first one isn't
6601 X86_64_SSE_CLASS or any other ones aren't
6602 X86_64_SSEUP_CLASS, everything should be passed in
6603 memory. */
6604 if (classes[0] != X86_64_SSE_CLASS)
6605 return 0;
6606
6607 for (i = 1; i < words; i++)
6608 if (classes[i] != X86_64_SSEUP_CLASS)
6609 return 0;
6610 }
6611
6612 /* Final merger cleanup. */
6613 for (i = 0; i < words; i++)
6614 {
6615 /* If one class is MEMORY, everything should be passed in
6616 memory. */
6617 if (classes[i] == X86_64_MEMORY_CLASS)
6618 return 0;
6619
6620 /* The X86_64_SSEUP_CLASS should be always preceded by
6621 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6622 if (classes[i] == X86_64_SSEUP_CLASS
6623 && classes[i - 1] != X86_64_SSE_CLASS
6624 && classes[i - 1] != X86_64_SSEUP_CLASS)
6625 {
6626 /* The first one should never be X86_64_SSEUP_CLASS. */
6627 gcc_assert (i != 0);
6628 classes[i] = X86_64_SSE_CLASS;
6629 }
6630
6631 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6632 everything should be passed in memory. */
6633 if (classes[i] == X86_64_X87UP_CLASS
6634 && (classes[i - 1] != X86_64_X87_CLASS))
6635 {
6636 static bool warned;
6637
6638 /* The first one should never be X86_64_X87UP_CLASS. */
6639 gcc_assert (i != 0);
6640 if (!warned && warn_psabi)
6641 {
6642 warned = true;
6643 inform (input_location,
6644 "the ABI of passing union with long double"
6645 " has changed in GCC 4.4");
6646 }
6647 return 0;
6648 }
6649 }
6650 return words;
6651 }
6652
6653 /* Compute alignment needed. We align all types to natural boundaries with
6654 exception of XFmode that is aligned to 64bits. */
6655 if (mode != VOIDmode && mode != BLKmode)
6656 {
6657 int mode_alignment = GET_MODE_BITSIZE (mode);
6658
6659 if (mode == XFmode)
6660 mode_alignment = 128;
6661 else if (mode == XCmode)
6662 mode_alignment = 256;
6663 if (COMPLEX_MODE_P (mode))
6664 mode_alignment /= 2;
6665 /* Misaligned fields are always returned in memory. */
6666 if (bit_offset % mode_alignment)
6667 return 0;
6668 }
6669
6670 /* for V1xx modes, just use the base mode */
6671 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6672 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6673 mode = GET_MODE_INNER (mode);
6674
6675 /* Classification of atomic types. */
6676 switch (mode)
6677 {
6678 case SDmode:
6679 case DDmode:
6680 classes[0] = X86_64_SSE_CLASS;
6681 return 1;
6682 case TDmode:
6683 classes[0] = X86_64_SSE_CLASS;
6684 classes[1] = X86_64_SSEUP_CLASS;
6685 return 2;
6686 case DImode:
6687 case SImode:
6688 case HImode:
6689 case QImode:
6690 case CSImode:
6691 case CHImode:
6692 case CQImode:
6693 {
6694 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6695
6696 /* Analyze last 128 bits only. */
6697 size = (size - 1) & 0x7f;
6698
6699 if (size < 32)
6700 {
6701 classes[0] = X86_64_INTEGERSI_CLASS;
6702 return 1;
6703 }
6704 else if (size < 64)
6705 {
6706 classes[0] = X86_64_INTEGER_CLASS;
6707 return 1;
6708 }
6709 else if (size < 64+32)
6710 {
6711 classes[0] = X86_64_INTEGER_CLASS;
6712 classes[1] = X86_64_INTEGERSI_CLASS;
6713 return 2;
6714 }
6715 else if (size < 64+64)
6716 {
6717 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6718 return 2;
6719 }
6720 else
6721 gcc_unreachable ();
6722 }
6723 case CDImode:
6724 case TImode:
6725 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6726 return 2;
6727 case COImode:
6728 case OImode:
6729 /* OImode shouldn't be used directly. */
6730 gcc_unreachable ();
6731 case CTImode:
6732 return 0;
6733 case SFmode:
6734 if (!(bit_offset % 64))
6735 classes[0] = X86_64_SSESF_CLASS;
6736 else
6737 classes[0] = X86_64_SSE_CLASS;
6738 return 1;
6739 case DFmode:
6740 classes[0] = X86_64_SSEDF_CLASS;
6741 return 1;
6742 case XFmode:
6743 classes[0] = X86_64_X87_CLASS;
6744 classes[1] = X86_64_X87UP_CLASS;
6745 return 2;
6746 case TFmode:
6747 classes[0] = X86_64_SSE_CLASS;
6748 classes[1] = X86_64_SSEUP_CLASS;
6749 return 2;
6750 case SCmode:
6751 classes[0] = X86_64_SSE_CLASS;
6752 if (!(bit_offset % 64))
6753 return 1;
6754 else
6755 {
6756 static bool warned;
6757
6758 if (!warned && warn_psabi)
6759 {
6760 warned = true;
6761 inform (input_location,
6762 "the ABI of passing structure with complex float"
6763 " member has changed in GCC 4.4");
6764 }
6765 classes[1] = X86_64_SSESF_CLASS;
6766 return 2;
6767 }
6768 case DCmode:
6769 classes[0] = X86_64_SSEDF_CLASS;
6770 classes[1] = X86_64_SSEDF_CLASS;
6771 return 2;
6772 case XCmode:
6773 classes[0] = X86_64_COMPLEX_X87_CLASS;
6774 return 1;
6775 case TCmode:
6776 /* This modes is larger than 16 bytes. */
6777 return 0;
6778 case V8SFmode:
6779 case V8SImode:
6780 case V32QImode:
6781 case V16HImode:
6782 case V4DFmode:
6783 case V4DImode:
6784 classes[0] = X86_64_SSE_CLASS;
6785 classes[1] = X86_64_SSEUP_CLASS;
6786 classes[2] = X86_64_SSEUP_CLASS;
6787 classes[3] = X86_64_SSEUP_CLASS;
6788 return 4;
6789 case V8DFmode:
6790 case V16SFmode:
6791 case V8DImode:
6792 case V16SImode:
6793 case V32HImode:
6794 case V64QImode:
6795 classes[0] = X86_64_SSE_CLASS;
6796 classes[1] = X86_64_SSEUP_CLASS;
6797 classes[2] = X86_64_SSEUP_CLASS;
6798 classes[3] = X86_64_SSEUP_CLASS;
6799 classes[4] = X86_64_SSEUP_CLASS;
6800 classes[5] = X86_64_SSEUP_CLASS;
6801 classes[6] = X86_64_SSEUP_CLASS;
6802 classes[7] = X86_64_SSEUP_CLASS;
6803 return 8;
6804 case V4SFmode:
6805 case V4SImode:
6806 case V16QImode:
6807 case V8HImode:
6808 case V2DFmode:
6809 case V2DImode:
6810 classes[0] = X86_64_SSE_CLASS;
6811 classes[1] = X86_64_SSEUP_CLASS;
6812 return 2;
6813 case V1TImode:
6814 case V1DImode:
6815 case V2SFmode:
6816 case V2SImode:
6817 case V4HImode:
6818 case V8QImode:
6819 classes[0] = X86_64_SSE_CLASS;
6820 return 1;
6821 case BLKmode:
6822 case VOIDmode:
6823 return 0;
6824 default:
6825 gcc_assert (VECTOR_MODE_P (mode));
6826
6827 if (bytes > 16)
6828 return 0;
6829
6830 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6831
6832 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6833 classes[0] = X86_64_INTEGERSI_CLASS;
6834 else
6835 classes[0] = X86_64_INTEGER_CLASS;
6836 classes[1] = X86_64_INTEGER_CLASS;
6837 return 1 + (bytes > 8);
6838 }
6839 }
6840
6841 /* Examine the argument and return set number of register required in each
6842 class. Return true iff parameter should be passed in memory. */
6843
6844 static bool
6845 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6846 int *int_nregs, int *sse_nregs)
6847 {
6848 enum x86_64_reg_class regclass[MAX_CLASSES];
6849 int n = classify_argument (mode, type, regclass, 0);
6850
6851 *int_nregs = 0;
6852 *sse_nregs = 0;
6853
6854 if (!n)
6855 return true;
6856 for (n--; n >= 0; n--)
6857 switch (regclass[n])
6858 {
6859 case X86_64_INTEGER_CLASS:
6860 case X86_64_INTEGERSI_CLASS:
6861 (*int_nregs)++;
6862 break;
6863 case X86_64_SSE_CLASS:
6864 case X86_64_SSESF_CLASS:
6865 case X86_64_SSEDF_CLASS:
6866 (*sse_nregs)++;
6867 break;
6868 case X86_64_NO_CLASS:
6869 case X86_64_SSEUP_CLASS:
6870 break;
6871 case X86_64_X87_CLASS:
6872 case X86_64_X87UP_CLASS:
6873 case X86_64_COMPLEX_X87_CLASS:
6874 if (!in_return)
6875 return true;
6876 break;
6877 case X86_64_MEMORY_CLASS:
6878 gcc_unreachable ();
6879 }
6880
6881 return false;
6882 }
6883
6884 /* Construct container for the argument used by GCC interface. See
6885 FUNCTION_ARG for the detailed description. */
6886
6887 static rtx
6888 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6889 const_tree type, int in_return, int nintregs, int nsseregs,
6890 const int *intreg, int sse_regno)
6891 {
6892 /* The following variables hold the static issued_error state. */
6893 static bool issued_sse_arg_error;
6894 static bool issued_sse_ret_error;
6895 static bool issued_x87_ret_error;
6896
6897 enum machine_mode tmpmode;
6898 int bytes =
6899 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6900 enum x86_64_reg_class regclass[MAX_CLASSES];
6901 int n;
6902 int i;
6903 int nexps = 0;
6904 int needed_sseregs, needed_intregs;
6905 rtx exp[MAX_CLASSES];
6906 rtx ret;
6907
6908 n = classify_argument (mode, type, regclass, 0);
6909 if (!n)
6910 return NULL;
6911 if (examine_argument (mode, type, in_return, &needed_intregs,
6912 &needed_sseregs))
6913 return NULL;
6914 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6915 return NULL;
6916
6917 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6918 some less clueful developer tries to use floating-point anyway. */
6919 if (needed_sseregs && !TARGET_SSE)
6920 {
6921 if (in_return)
6922 {
6923 if (!issued_sse_ret_error)
6924 {
6925 error ("SSE register return with SSE disabled");
6926 issued_sse_ret_error = true;
6927 }
6928 }
6929 else if (!issued_sse_arg_error)
6930 {
6931 error ("SSE register argument with SSE disabled");
6932 issued_sse_arg_error = true;
6933 }
6934 return NULL;
6935 }
6936
6937 /* Likewise, error if the ABI requires us to return values in the
6938 x87 registers and the user specified -mno-80387. */
6939 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6940 for (i = 0; i < n; i++)
6941 if (regclass[i] == X86_64_X87_CLASS
6942 || regclass[i] == X86_64_X87UP_CLASS
6943 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6944 {
6945 if (!issued_x87_ret_error)
6946 {
6947 error ("x87 register return with x87 disabled");
6948 issued_x87_ret_error = true;
6949 }
6950 return NULL;
6951 }
6952
6953 /* First construct simple cases. Avoid SCmode, since we want to use
6954 single register to pass this type. */
6955 if (n == 1 && mode != SCmode)
6956 switch (regclass[0])
6957 {
6958 case X86_64_INTEGER_CLASS:
6959 case X86_64_INTEGERSI_CLASS:
6960 return gen_rtx_REG (mode, intreg[0]);
6961 case X86_64_SSE_CLASS:
6962 case X86_64_SSESF_CLASS:
6963 case X86_64_SSEDF_CLASS:
6964 if (mode != BLKmode)
6965 return gen_reg_or_parallel (mode, orig_mode,
6966 SSE_REGNO (sse_regno));
6967 break;
6968 case X86_64_X87_CLASS:
6969 case X86_64_COMPLEX_X87_CLASS:
6970 return gen_rtx_REG (mode, FIRST_STACK_REG);
6971 case X86_64_NO_CLASS:
6972 /* Zero sized array, struct or class. */
6973 return NULL;
6974 default:
6975 gcc_unreachable ();
6976 }
6977 if (n == 2
6978 && regclass[0] == X86_64_SSE_CLASS
6979 && regclass[1] == X86_64_SSEUP_CLASS
6980 && mode != BLKmode)
6981 return gen_reg_or_parallel (mode, orig_mode,
6982 SSE_REGNO (sse_regno));
6983 if (n == 4
6984 && regclass[0] == X86_64_SSE_CLASS
6985 && regclass[1] == X86_64_SSEUP_CLASS
6986 && regclass[2] == X86_64_SSEUP_CLASS
6987 && regclass[3] == X86_64_SSEUP_CLASS
6988 && mode != BLKmode)
6989 return gen_reg_or_parallel (mode, orig_mode,
6990 SSE_REGNO (sse_regno));
6991 if (n == 8
6992 && regclass[0] == X86_64_SSE_CLASS
6993 && regclass[1] == X86_64_SSEUP_CLASS
6994 && regclass[2] == X86_64_SSEUP_CLASS
6995 && regclass[3] == X86_64_SSEUP_CLASS
6996 && regclass[4] == X86_64_SSEUP_CLASS
6997 && regclass[5] == X86_64_SSEUP_CLASS
6998 && regclass[6] == X86_64_SSEUP_CLASS
6999 && regclass[7] == X86_64_SSEUP_CLASS
7000 && mode != BLKmode)
7001 return gen_reg_or_parallel (mode, orig_mode,
7002 SSE_REGNO (sse_regno));
7003 if (n == 2
7004 && regclass[0] == X86_64_X87_CLASS
7005 && regclass[1] == X86_64_X87UP_CLASS)
7006 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7007
7008 if (n == 2
7009 && regclass[0] == X86_64_INTEGER_CLASS
7010 && regclass[1] == X86_64_INTEGER_CLASS
7011 && (mode == CDImode || mode == TImode)
7012 && intreg[0] + 1 == intreg[1])
7013 return gen_rtx_REG (mode, intreg[0]);
7014
7015 /* Otherwise figure out the entries of the PARALLEL. */
7016 for (i = 0; i < n; i++)
7017 {
7018 int pos;
7019
7020 switch (regclass[i])
7021 {
7022 case X86_64_NO_CLASS:
7023 break;
7024 case X86_64_INTEGER_CLASS:
7025 case X86_64_INTEGERSI_CLASS:
7026 /* Merge TImodes on aligned occasions here too. */
7027 if (i * 8 + 8 > bytes)
7028 tmpmode
7029 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7030 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7031 tmpmode = SImode;
7032 else
7033 tmpmode = DImode;
7034 /* We've requested 24 bytes we
7035 don't have mode for. Use DImode. */
7036 if (tmpmode == BLKmode)
7037 tmpmode = DImode;
7038 exp [nexps++]
7039 = gen_rtx_EXPR_LIST (VOIDmode,
7040 gen_rtx_REG (tmpmode, *intreg),
7041 GEN_INT (i*8));
7042 intreg++;
7043 break;
7044 case X86_64_SSESF_CLASS:
7045 exp [nexps++]
7046 = gen_rtx_EXPR_LIST (VOIDmode,
7047 gen_rtx_REG (SFmode,
7048 SSE_REGNO (sse_regno)),
7049 GEN_INT (i*8));
7050 sse_regno++;
7051 break;
7052 case X86_64_SSEDF_CLASS:
7053 exp [nexps++]
7054 = gen_rtx_EXPR_LIST (VOIDmode,
7055 gen_rtx_REG (DFmode,
7056 SSE_REGNO (sse_regno)),
7057 GEN_INT (i*8));
7058 sse_regno++;
7059 break;
7060 case X86_64_SSE_CLASS:
7061 pos = i;
7062 switch (n)
7063 {
7064 case 1:
7065 tmpmode = DImode;
7066 break;
7067 case 2:
7068 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7069 {
7070 tmpmode = TImode;
7071 i++;
7072 }
7073 else
7074 tmpmode = DImode;
7075 break;
7076 case 4:
7077 gcc_assert (i == 0
7078 && regclass[1] == X86_64_SSEUP_CLASS
7079 && regclass[2] == X86_64_SSEUP_CLASS
7080 && regclass[3] == X86_64_SSEUP_CLASS);
7081 tmpmode = OImode;
7082 i += 3;
7083 break;
7084 case 8:
7085 gcc_assert (i == 0
7086 && regclass[1] == X86_64_SSEUP_CLASS
7087 && regclass[2] == X86_64_SSEUP_CLASS
7088 && regclass[3] == X86_64_SSEUP_CLASS
7089 && regclass[4] == X86_64_SSEUP_CLASS
7090 && regclass[5] == X86_64_SSEUP_CLASS
7091 && regclass[6] == X86_64_SSEUP_CLASS
7092 && regclass[7] == X86_64_SSEUP_CLASS);
7093 tmpmode = XImode;
7094 i += 7;
7095 break;
7096 default:
7097 gcc_unreachable ();
7098 }
7099 exp [nexps++]
7100 = gen_rtx_EXPR_LIST (VOIDmode,
7101 gen_rtx_REG (tmpmode,
7102 SSE_REGNO (sse_regno)),
7103 GEN_INT (pos*8));
7104 sse_regno++;
7105 break;
7106 default:
7107 gcc_unreachable ();
7108 }
7109 }
7110
7111 /* Empty aligned struct, union or class. */
7112 if (nexps == 0)
7113 return NULL;
7114
7115 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7116 for (i = 0; i < nexps; i++)
7117 XVECEXP (ret, 0, i) = exp [i];
7118 return ret;
7119 }
7120
7121 /* Update the data in CUM to advance over an argument of mode MODE
7122 and data type TYPE. (TYPE is null for libcalls where that information
7123 may not be available.) */
7124
7125 static void
7126 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7127 const_tree type, HOST_WIDE_INT bytes,
7128 HOST_WIDE_INT words)
7129 {
7130 switch (mode)
7131 {
7132 default:
7133 break;
7134
7135 case BLKmode:
7136 if (bytes < 0)
7137 break;
7138 /* FALLTHRU */
7139
7140 case DImode:
7141 case SImode:
7142 case HImode:
7143 case QImode:
7144 cum->words += words;
7145 cum->nregs -= words;
7146 cum->regno += words;
7147
7148 if (cum->nregs <= 0)
7149 {
7150 cum->nregs = 0;
7151 cum->regno = 0;
7152 }
7153 break;
7154
7155 case OImode:
7156 /* OImode shouldn't be used directly. */
7157 gcc_unreachable ();
7158
7159 case DFmode:
7160 if (cum->float_in_sse < 2)
7161 break;
7162 case SFmode:
7163 if (cum->float_in_sse < 1)
7164 break;
7165 /* FALLTHRU */
7166
7167 case V8SFmode:
7168 case V8SImode:
7169 case V64QImode:
7170 case V32HImode:
7171 case V16SImode:
7172 case V8DImode:
7173 case V16SFmode:
7174 case V8DFmode:
7175 case V32QImode:
7176 case V16HImode:
7177 case V4DFmode:
7178 case V4DImode:
7179 case TImode:
7180 case V16QImode:
7181 case V8HImode:
7182 case V4SImode:
7183 case V2DImode:
7184 case V4SFmode:
7185 case V2DFmode:
7186 if (!type || !AGGREGATE_TYPE_P (type))
7187 {
7188 cum->sse_words += words;
7189 cum->sse_nregs -= 1;
7190 cum->sse_regno += 1;
7191 if (cum->sse_nregs <= 0)
7192 {
7193 cum->sse_nregs = 0;
7194 cum->sse_regno = 0;
7195 }
7196 }
7197 break;
7198
7199 case V8QImode:
7200 case V4HImode:
7201 case V2SImode:
7202 case V2SFmode:
7203 case V1TImode:
7204 case V1DImode:
7205 if (!type || !AGGREGATE_TYPE_P (type))
7206 {
7207 cum->mmx_words += words;
7208 cum->mmx_nregs -= 1;
7209 cum->mmx_regno += 1;
7210 if (cum->mmx_nregs <= 0)
7211 {
7212 cum->mmx_nregs = 0;
7213 cum->mmx_regno = 0;
7214 }
7215 }
7216 break;
7217 }
7218 }
7219
7220 static void
7221 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7222 const_tree type, HOST_WIDE_INT words, bool named)
7223 {
7224 int int_nregs, sse_nregs;
7225
7226 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7227 if (!named && (VALID_AVX512F_REG_MODE (mode)
7228 || VALID_AVX256_REG_MODE (mode)))
7229 return;
7230
7231 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7232 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7233 {
7234 cum->nregs -= int_nregs;
7235 cum->sse_nregs -= sse_nregs;
7236 cum->regno += int_nregs;
7237 cum->sse_regno += sse_nregs;
7238 }
7239 else
7240 {
7241 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7242 cum->words = (cum->words + align - 1) & ~(align - 1);
7243 cum->words += words;
7244 }
7245 }
7246
7247 static void
7248 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7249 HOST_WIDE_INT words)
7250 {
7251 /* Otherwise, this should be passed indirect. */
7252 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7253
7254 cum->words += words;
7255 if (cum->nregs > 0)
7256 {
7257 cum->nregs -= 1;
7258 cum->regno += 1;
7259 }
7260 }
7261
7262 /* Update the data in CUM to advance over an argument of mode MODE and
7263 data type TYPE. (TYPE is null for libcalls where that information
7264 may not be available.) */
7265
7266 static void
7267 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7268 const_tree type, bool named)
7269 {
7270 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7271 HOST_WIDE_INT bytes, words;
7272
7273 if (mode == BLKmode)
7274 bytes = int_size_in_bytes (type);
7275 else
7276 bytes = GET_MODE_SIZE (mode);
7277 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7278
7279 if (type)
7280 mode = type_natural_mode (type, NULL, false);
7281
7282 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7283 function_arg_advance_ms_64 (cum, bytes, words);
7284 else if (TARGET_64BIT)
7285 function_arg_advance_64 (cum, mode, type, words, named);
7286 else
7287 function_arg_advance_32 (cum, mode, type, bytes, words);
7288 }
7289
7290 /* Define where to put the arguments to a function.
7291 Value is zero to push the argument on the stack,
7292 or a hard register in which to store the argument.
7293
7294 MODE is the argument's machine mode.
7295 TYPE is the data type of the argument (as a tree).
7296 This is null for libcalls where that information may
7297 not be available.
7298 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7299 the preceding args and about the function being called.
7300 NAMED is nonzero if this argument is a named parameter
7301 (otherwise it is an extra parameter matching an ellipsis). */
7302
7303 static rtx
7304 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7305 enum machine_mode orig_mode, const_tree type,
7306 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7307 {
7308 /* Avoid the AL settings for the Unix64 ABI. */
7309 if (mode == VOIDmode)
7310 return constm1_rtx;
7311
7312 switch (mode)
7313 {
7314 default:
7315 break;
7316
7317 case BLKmode:
7318 if (bytes < 0)
7319 break;
7320 /* FALLTHRU */
7321 case DImode:
7322 case SImode:
7323 case HImode:
7324 case QImode:
7325 if (words <= cum->nregs)
7326 {
7327 int regno = cum->regno;
7328
7329 /* Fastcall allocates the first two DWORD (SImode) or
7330 smaller arguments to ECX and EDX if it isn't an
7331 aggregate type . */
7332 if (cum->fastcall)
7333 {
7334 if (mode == BLKmode
7335 || mode == DImode
7336 || (type && AGGREGATE_TYPE_P (type)))
7337 break;
7338
7339 /* ECX not EAX is the first allocated register. */
7340 if (regno == AX_REG)
7341 regno = CX_REG;
7342 }
7343 return gen_rtx_REG (mode, regno);
7344 }
7345 break;
7346
7347 case DFmode:
7348 if (cum->float_in_sse < 2)
7349 break;
7350 case SFmode:
7351 if (cum->float_in_sse < 1)
7352 break;
7353 /* FALLTHRU */
7354 case TImode:
7355 /* In 32bit, we pass TImode in xmm registers. */
7356 case V16QImode:
7357 case V8HImode:
7358 case V4SImode:
7359 case V2DImode:
7360 case V4SFmode:
7361 case V2DFmode:
7362 if (!type || !AGGREGATE_TYPE_P (type))
7363 {
7364 if (cum->sse_nregs)
7365 return gen_reg_or_parallel (mode, orig_mode,
7366 cum->sse_regno + FIRST_SSE_REG);
7367 }
7368 break;
7369
7370 case OImode:
7371 case XImode:
7372 /* OImode and XImode shouldn't be used directly. */
7373 gcc_unreachable ();
7374
7375 case V64QImode:
7376 case V32HImode:
7377 case V16SImode:
7378 case V8DImode:
7379 case V16SFmode:
7380 case V8DFmode:
7381 case V8SFmode:
7382 case V8SImode:
7383 case V32QImode:
7384 case V16HImode:
7385 case V4DFmode:
7386 case V4DImode:
7387 if (!type || !AGGREGATE_TYPE_P (type))
7388 {
7389 if (cum->sse_nregs)
7390 return gen_reg_or_parallel (mode, orig_mode,
7391 cum->sse_regno + FIRST_SSE_REG);
7392 }
7393 break;
7394
7395 case V8QImode:
7396 case V4HImode:
7397 case V2SImode:
7398 case V2SFmode:
7399 case V1TImode:
7400 case V1DImode:
7401 if (!type || !AGGREGATE_TYPE_P (type))
7402 {
7403 if (cum->mmx_nregs)
7404 return gen_reg_or_parallel (mode, orig_mode,
7405 cum->mmx_regno + FIRST_MMX_REG);
7406 }
7407 break;
7408 }
7409
7410 return NULL_RTX;
7411 }
7412
7413 static rtx
7414 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7415 enum machine_mode orig_mode, const_tree type, bool named)
7416 {
7417 /* Handle a hidden AL argument containing number of registers
7418 for varargs x86-64 functions. */
7419 if (mode == VOIDmode)
7420 return GEN_INT (cum->maybe_vaarg
7421 ? (cum->sse_nregs < 0
7422 ? X86_64_SSE_REGPARM_MAX
7423 : cum->sse_regno)
7424 : -1);
7425
7426 switch (mode)
7427 {
7428 default:
7429 break;
7430
7431 case V8SFmode:
7432 case V8SImode:
7433 case V32QImode:
7434 case V16HImode:
7435 case V4DFmode:
7436 case V4DImode:
7437 case V16SFmode:
7438 case V16SImode:
7439 case V64QImode:
7440 case V32HImode:
7441 case V8DFmode:
7442 case V8DImode:
7443 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7444 if (!named)
7445 return NULL;
7446 break;
7447 }
7448
7449 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7450 cum->sse_nregs,
7451 &x86_64_int_parameter_registers [cum->regno],
7452 cum->sse_regno);
7453 }
7454
7455 static rtx
7456 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7457 enum machine_mode orig_mode, bool named,
7458 HOST_WIDE_INT bytes)
7459 {
7460 unsigned int regno;
7461
7462 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7463 We use value of -2 to specify that current function call is MSABI. */
7464 if (mode == VOIDmode)
7465 return GEN_INT (-2);
7466
7467 /* If we've run out of registers, it goes on the stack. */
7468 if (cum->nregs == 0)
7469 return NULL_RTX;
7470
7471 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7472
7473 /* Only floating point modes are passed in anything but integer regs. */
7474 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7475 {
7476 if (named)
7477 regno = cum->regno + FIRST_SSE_REG;
7478 else
7479 {
7480 rtx t1, t2;
7481
7482 /* Unnamed floating parameters are passed in both the
7483 SSE and integer registers. */
7484 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7485 t2 = gen_rtx_REG (mode, regno);
7486 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7487 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7488 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7489 }
7490 }
7491 /* Handle aggregated types passed in register. */
7492 if (orig_mode == BLKmode)
7493 {
7494 if (bytes > 0 && bytes <= 8)
7495 mode = (bytes > 4 ? DImode : SImode);
7496 if (mode == BLKmode)
7497 mode = DImode;
7498 }
7499
7500 return gen_reg_or_parallel (mode, orig_mode, regno);
7501 }
7502
7503 /* Return where to put the arguments to a function.
7504 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7505
7506 MODE is the argument's machine mode. TYPE is the data type of the
7507 argument. It is null for libcalls where that information may not be
7508 available. CUM gives information about the preceding args and about
7509 the function being called. NAMED is nonzero if this argument is a
7510 named parameter (otherwise it is an extra parameter matching an
7511 ellipsis). */
7512
7513 static rtx
7514 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7515 const_tree type, bool named)
7516 {
7517 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7518 enum machine_mode mode = omode;
7519 HOST_WIDE_INT bytes, words;
7520 rtx arg;
7521
7522 if (mode == BLKmode)
7523 bytes = int_size_in_bytes (type);
7524 else
7525 bytes = GET_MODE_SIZE (mode);
7526 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7527
7528 /* To simplify the code below, represent vector types with a vector mode
7529 even if MMX/SSE are not active. */
7530 if (type && TREE_CODE (type) == VECTOR_TYPE)
7531 mode = type_natural_mode (type, cum, false);
7532
7533 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7534 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7535 else if (TARGET_64BIT)
7536 arg = function_arg_64 (cum, mode, omode, type, named);
7537 else
7538 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7539
7540 return arg;
7541 }
7542
7543 /* A C expression that indicates when an argument must be passed by
7544 reference. If nonzero for an argument, a copy of that argument is
7545 made in memory and a pointer to the argument is passed instead of
7546 the argument itself. The pointer is passed in whatever way is
7547 appropriate for passing a pointer to that type. */
7548
7549 static bool
7550 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7551 const_tree type, bool)
7552 {
7553 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7554
7555 /* See Windows x64 Software Convention. */
7556 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7557 {
7558 int msize = (int) GET_MODE_SIZE (mode);
7559 if (type)
7560 {
7561 /* Arrays are passed by reference. */
7562 if (TREE_CODE (type) == ARRAY_TYPE)
7563 return true;
7564
7565 if (AGGREGATE_TYPE_P (type))
7566 {
7567 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7568 are passed by reference. */
7569 msize = int_size_in_bytes (type);
7570 }
7571 }
7572
7573 /* __m128 is passed by reference. */
7574 switch (msize) {
7575 case 1: case 2: case 4: case 8:
7576 break;
7577 default:
7578 return true;
7579 }
7580 }
7581 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7582 return 1;
7583
7584 return 0;
7585 }
7586
7587 /* Return true when TYPE should be 128bit aligned for 32bit argument
7588 passing ABI. XXX: This function is obsolete and is only used for
7589 checking psABI compatibility with previous versions of GCC. */
7590
7591 static bool
7592 ix86_compat_aligned_value_p (const_tree type)
7593 {
7594 enum machine_mode mode = TYPE_MODE (type);
7595 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7596 || mode == TDmode
7597 || mode == TFmode
7598 || mode == TCmode)
7599 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7600 return true;
7601 if (TYPE_ALIGN (type) < 128)
7602 return false;
7603
7604 if (AGGREGATE_TYPE_P (type))
7605 {
7606 /* Walk the aggregates recursively. */
7607 switch (TREE_CODE (type))
7608 {
7609 case RECORD_TYPE:
7610 case UNION_TYPE:
7611 case QUAL_UNION_TYPE:
7612 {
7613 tree field;
7614
7615 /* Walk all the structure fields. */
7616 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7617 {
7618 if (TREE_CODE (field) == FIELD_DECL
7619 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7620 return true;
7621 }
7622 break;
7623 }
7624
7625 case ARRAY_TYPE:
7626 /* Just for use if some languages passes arrays by value. */
7627 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7628 return true;
7629 break;
7630
7631 default:
7632 gcc_unreachable ();
7633 }
7634 }
7635 return false;
7636 }
7637
7638 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7639 XXX: This function is obsolete and is only used for checking psABI
7640 compatibility with previous versions of GCC. */
7641
7642 static unsigned int
7643 ix86_compat_function_arg_boundary (enum machine_mode mode,
7644 const_tree type, unsigned int align)
7645 {
7646 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7647 natural boundaries. */
7648 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7649 {
7650 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7651 make an exception for SSE modes since these require 128bit
7652 alignment.
7653
7654 The handling here differs from field_alignment. ICC aligns MMX
7655 arguments to 4 byte boundaries, while structure fields are aligned
7656 to 8 byte boundaries. */
7657 if (!type)
7658 {
7659 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7660 align = PARM_BOUNDARY;
7661 }
7662 else
7663 {
7664 if (!ix86_compat_aligned_value_p (type))
7665 align = PARM_BOUNDARY;
7666 }
7667 }
7668 if (align > BIGGEST_ALIGNMENT)
7669 align = BIGGEST_ALIGNMENT;
7670 return align;
7671 }
7672
7673 /* Return true when TYPE should be 128bit aligned for 32bit argument
7674 passing ABI. */
7675
7676 static bool
7677 ix86_contains_aligned_value_p (const_tree type)
7678 {
7679 enum machine_mode mode = TYPE_MODE (type);
7680
7681 if (mode == XFmode || mode == XCmode)
7682 return false;
7683
7684 if (TYPE_ALIGN (type) < 128)
7685 return false;
7686
7687 if (AGGREGATE_TYPE_P (type))
7688 {
7689 /* Walk the aggregates recursively. */
7690 switch (TREE_CODE (type))
7691 {
7692 case RECORD_TYPE:
7693 case UNION_TYPE:
7694 case QUAL_UNION_TYPE:
7695 {
7696 tree field;
7697
7698 /* Walk all the structure fields. */
7699 for (field = TYPE_FIELDS (type);
7700 field;
7701 field = DECL_CHAIN (field))
7702 {
7703 if (TREE_CODE (field) == FIELD_DECL
7704 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7705 return true;
7706 }
7707 break;
7708 }
7709
7710 case ARRAY_TYPE:
7711 /* Just for use if some languages passes arrays by value. */
7712 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7713 return true;
7714 break;
7715
7716 default:
7717 gcc_unreachable ();
7718 }
7719 }
7720 else
7721 return TYPE_ALIGN (type) >= 128;
7722
7723 return false;
7724 }
7725
7726 /* Gives the alignment boundary, in bits, of an argument with the
7727 specified mode and type. */
7728
7729 static unsigned int
7730 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7731 {
7732 unsigned int align;
7733 if (type)
7734 {
7735 /* Since the main variant type is used for call, we convert it to
7736 the main variant type. */
7737 type = TYPE_MAIN_VARIANT (type);
7738 align = TYPE_ALIGN (type);
7739 }
7740 else
7741 align = GET_MODE_ALIGNMENT (mode);
7742 if (align < PARM_BOUNDARY)
7743 align = PARM_BOUNDARY;
7744 else
7745 {
7746 static bool warned;
7747 unsigned int saved_align = align;
7748
7749 if (!TARGET_64BIT)
7750 {
7751 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7752 if (!type)
7753 {
7754 if (mode == XFmode || mode == XCmode)
7755 align = PARM_BOUNDARY;
7756 }
7757 else if (!ix86_contains_aligned_value_p (type))
7758 align = PARM_BOUNDARY;
7759
7760 if (align < 128)
7761 align = PARM_BOUNDARY;
7762 }
7763
7764 if (warn_psabi
7765 && !warned
7766 && align != ix86_compat_function_arg_boundary (mode, type,
7767 saved_align))
7768 {
7769 warned = true;
7770 inform (input_location,
7771 "The ABI for passing parameters with %d-byte"
7772 " alignment has changed in GCC 4.6",
7773 align / BITS_PER_UNIT);
7774 }
7775 }
7776
7777 return align;
7778 }
7779
7780 /* Return true if N is a possible register number of function value. */
7781
7782 static bool
7783 ix86_function_value_regno_p (const unsigned int regno)
7784 {
7785 switch (regno)
7786 {
7787 case AX_REG:
7788 return true;
7789 case DX_REG:
7790 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7791 case DI_REG:
7792 case SI_REG:
7793 return TARGET_64BIT && ix86_abi != MS_ABI;
7794
7795 /* Complex values are returned in %st(0)/%st(1) pair. */
7796 case ST0_REG:
7797 case ST1_REG:
7798 /* TODO: The function should depend on current function ABI but
7799 builtins.c would need updating then. Therefore we use the
7800 default ABI. */
7801 if (TARGET_64BIT && ix86_abi == MS_ABI)
7802 return false;
7803 return TARGET_FLOAT_RETURNS_IN_80387;
7804
7805 /* Complex values are returned in %xmm0/%xmm1 pair. */
7806 case XMM0_REG:
7807 case XMM1_REG:
7808 return TARGET_SSE;
7809
7810 case MM0_REG:
7811 if (TARGET_MACHO || TARGET_64BIT)
7812 return false;
7813 return TARGET_MMX;
7814 }
7815
7816 return false;
7817 }
7818
7819 /* Define how to find the value returned by a function.
7820 VALTYPE is the data type of the value (as a tree).
7821 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7822 otherwise, FUNC is 0. */
7823
7824 static rtx
7825 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7826 const_tree fntype, const_tree fn)
7827 {
7828 unsigned int regno;
7829
7830 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7831 we normally prevent this case when mmx is not available. However
7832 some ABIs may require the result to be returned like DImode. */
7833 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7834 regno = FIRST_MMX_REG;
7835
7836 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7837 we prevent this case when sse is not available. However some ABIs
7838 may require the result to be returned like integer TImode. */
7839 else if (mode == TImode
7840 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7841 regno = FIRST_SSE_REG;
7842
7843 /* 32-byte vector modes in %ymm0. */
7844 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7845 regno = FIRST_SSE_REG;
7846
7847 /* 64-byte vector modes in %zmm0. */
7848 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7849 regno = FIRST_SSE_REG;
7850
7851 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7852 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7853 regno = FIRST_FLOAT_REG;
7854 else
7855 /* Most things go in %eax. */
7856 regno = AX_REG;
7857
7858 /* Override FP return register with %xmm0 for local functions when
7859 SSE math is enabled or for functions with sseregparm attribute. */
7860 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7861 {
7862 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7863 if ((sse_level >= 1 && mode == SFmode)
7864 || (sse_level == 2 && mode == DFmode))
7865 regno = FIRST_SSE_REG;
7866 }
7867
7868 /* OImode shouldn't be used directly. */
7869 gcc_assert (mode != OImode);
7870
7871 return gen_rtx_REG (orig_mode, regno);
7872 }
7873
7874 static rtx
7875 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7876 const_tree valtype)
7877 {
7878 rtx ret;
7879
7880 /* Handle libcalls, which don't provide a type node. */
7881 if (valtype == NULL)
7882 {
7883 unsigned int regno;
7884
7885 switch (mode)
7886 {
7887 case SFmode:
7888 case SCmode:
7889 case DFmode:
7890 case DCmode:
7891 case TFmode:
7892 case SDmode:
7893 case DDmode:
7894 case TDmode:
7895 regno = FIRST_SSE_REG;
7896 break;
7897 case XFmode:
7898 case XCmode:
7899 regno = FIRST_FLOAT_REG;
7900 break;
7901 case TCmode:
7902 return NULL;
7903 default:
7904 regno = AX_REG;
7905 }
7906
7907 return gen_rtx_REG (mode, regno);
7908 }
7909 else if (POINTER_TYPE_P (valtype))
7910 {
7911 /* Pointers are always returned in word_mode. */
7912 mode = word_mode;
7913 }
7914
7915 ret = construct_container (mode, orig_mode, valtype, 1,
7916 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7917 x86_64_int_return_registers, 0);
7918
7919 /* For zero sized structures, construct_container returns NULL, but we
7920 need to keep rest of compiler happy by returning meaningful value. */
7921 if (!ret)
7922 ret = gen_rtx_REG (orig_mode, AX_REG);
7923
7924 return ret;
7925 }
7926
7927 static rtx
7928 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7929 const_tree valtype)
7930 {
7931 unsigned int regno = AX_REG;
7932
7933 if (TARGET_SSE)
7934 {
7935 switch (GET_MODE_SIZE (mode))
7936 {
7937 case 16:
7938 if (valtype != NULL_TREE
7939 && !VECTOR_INTEGER_TYPE_P (valtype)
7940 && !VECTOR_INTEGER_TYPE_P (valtype)
7941 && !INTEGRAL_TYPE_P (valtype)
7942 && !VECTOR_FLOAT_TYPE_P (valtype))
7943 break;
7944 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7945 && !COMPLEX_MODE_P (mode))
7946 regno = FIRST_SSE_REG;
7947 break;
7948 case 8:
7949 case 4:
7950 if (mode == SFmode || mode == DFmode)
7951 regno = FIRST_SSE_REG;
7952 break;
7953 default:
7954 break;
7955 }
7956 }
7957 return gen_rtx_REG (orig_mode, regno);
7958 }
7959
7960 static rtx
7961 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7962 enum machine_mode orig_mode, enum machine_mode mode)
7963 {
7964 const_tree fn, fntype;
7965
7966 fn = NULL_TREE;
7967 if (fntype_or_decl && DECL_P (fntype_or_decl))
7968 fn = fntype_or_decl;
7969 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7970
7971 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7972 return function_value_ms_64 (orig_mode, mode, valtype);
7973 else if (TARGET_64BIT)
7974 return function_value_64 (orig_mode, mode, valtype);
7975 else
7976 return function_value_32 (orig_mode, mode, fntype, fn);
7977 }
7978
7979 static rtx
7980 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
7981 {
7982 enum machine_mode mode, orig_mode;
7983
7984 orig_mode = TYPE_MODE (valtype);
7985 mode = type_natural_mode (valtype, NULL, true);
7986 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7987 }
7988
7989 /* Pointer function arguments and return values are promoted to
7990 word_mode. */
7991
7992 static enum machine_mode
7993 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7994 int *punsignedp, const_tree fntype,
7995 int for_return)
7996 {
7997 if (type != NULL_TREE && POINTER_TYPE_P (type))
7998 {
7999 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8000 return word_mode;
8001 }
8002 return default_promote_function_mode (type, mode, punsignedp, fntype,
8003 for_return);
8004 }
8005
8006 /* Return true if a structure, union or array with MODE containing FIELD
8007 should be accessed using BLKmode. */
8008
8009 static bool
8010 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8011 {
8012 /* Union with XFmode must be in BLKmode. */
8013 return (mode == XFmode
8014 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8015 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8016 }
8017
8018 rtx
8019 ix86_libcall_value (enum machine_mode mode)
8020 {
8021 return ix86_function_value_1 (NULL, NULL, mode, mode);
8022 }
8023
8024 /* Return true iff type is returned in memory. */
8025
8026 static bool
8027 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8028 {
8029 #ifdef SUBTARGET_RETURN_IN_MEMORY
8030 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8031 #else
8032 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8033 HOST_WIDE_INT size;
8034
8035 if (TARGET_64BIT)
8036 {
8037 if (ix86_function_type_abi (fntype) == MS_ABI)
8038 {
8039 size = int_size_in_bytes (type);
8040
8041 /* __m128 is returned in xmm0. */
8042 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8043 || INTEGRAL_TYPE_P (type)
8044 || VECTOR_FLOAT_TYPE_P (type))
8045 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8046 && !COMPLEX_MODE_P (mode)
8047 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8048 return false;
8049
8050 /* Otherwise, the size must be exactly in [1248]. */
8051 return size != 1 && size != 2 && size != 4 && size != 8;
8052 }
8053 else
8054 {
8055 int needed_intregs, needed_sseregs;
8056
8057 return examine_argument (mode, type, 1,
8058 &needed_intregs, &needed_sseregs);
8059 }
8060 }
8061 else
8062 {
8063 if (mode == BLKmode)
8064 return true;
8065
8066 size = int_size_in_bytes (type);
8067
8068 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8069 return false;
8070
8071 if (VECTOR_MODE_P (mode) || mode == TImode)
8072 {
8073 /* User-created vectors small enough to fit in EAX. */
8074 if (size < 8)
8075 return false;
8076
8077 /* Unless ABI prescibes otherwise,
8078 MMX/3dNow values are returned in MM0 if available. */
8079
8080 if (size == 8)
8081 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8082
8083 /* SSE values are returned in XMM0 if available. */
8084 if (size == 16)
8085 return !TARGET_SSE;
8086
8087 /* AVX values are returned in YMM0 if available. */
8088 if (size == 32)
8089 return !TARGET_AVX;
8090
8091 /* AVX512F values are returned in ZMM0 if available. */
8092 if (size == 64)
8093 return !TARGET_AVX512F;
8094 }
8095
8096 if (mode == XFmode)
8097 return false;
8098
8099 if (size > 12)
8100 return true;
8101
8102 /* OImode shouldn't be used directly. */
8103 gcc_assert (mode != OImode);
8104
8105 return false;
8106 }
8107 #endif
8108 }
8109
8110 \f
8111 /* Create the va_list data type. */
8112
8113 /* Returns the calling convention specific va_list date type.
8114 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8115
8116 static tree
8117 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8118 {
8119 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8120
8121 /* For i386 we use plain pointer to argument area. */
8122 if (!TARGET_64BIT || abi == MS_ABI)
8123 return build_pointer_type (char_type_node);
8124
8125 record = lang_hooks.types.make_type (RECORD_TYPE);
8126 type_decl = build_decl (BUILTINS_LOCATION,
8127 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8128
8129 f_gpr = build_decl (BUILTINS_LOCATION,
8130 FIELD_DECL, get_identifier ("gp_offset"),
8131 unsigned_type_node);
8132 f_fpr = build_decl (BUILTINS_LOCATION,
8133 FIELD_DECL, get_identifier ("fp_offset"),
8134 unsigned_type_node);
8135 f_ovf = build_decl (BUILTINS_LOCATION,
8136 FIELD_DECL, get_identifier ("overflow_arg_area"),
8137 ptr_type_node);
8138 f_sav = build_decl (BUILTINS_LOCATION,
8139 FIELD_DECL, get_identifier ("reg_save_area"),
8140 ptr_type_node);
8141
8142 va_list_gpr_counter_field = f_gpr;
8143 va_list_fpr_counter_field = f_fpr;
8144
8145 DECL_FIELD_CONTEXT (f_gpr) = record;
8146 DECL_FIELD_CONTEXT (f_fpr) = record;
8147 DECL_FIELD_CONTEXT (f_ovf) = record;
8148 DECL_FIELD_CONTEXT (f_sav) = record;
8149
8150 TYPE_STUB_DECL (record) = type_decl;
8151 TYPE_NAME (record) = type_decl;
8152 TYPE_FIELDS (record) = f_gpr;
8153 DECL_CHAIN (f_gpr) = f_fpr;
8154 DECL_CHAIN (f_fpr) = f_ovf;
8155 DECL_CHAIN (f_ovf) = f_sav;
8156
8157 layout_type (record);
8158
8159 /* The correct type is an array type of one element. */
8160 return build_array_type (record, build_index_type (size_zero_node));
8161 }
8162
8163 /* Setup the builtin va_list data type and for 64-bit the additional
8164 calling convention specific va_list data types. */
8165
8166 static tree
8167 ix86_build_builtin_va_list (void)
8168 {
8169 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8170
8171 /* Initialize abi specific va_list builtin types. */
8172 if (TARGET_64BIT)
8173 {
8174 tree t;
8175 if (ix86_abi == MS_ABI)
8176 {
8177 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8178 if (TREE_CODE (t) != RECORD_TYPE)
8179 t = build_variant_type_copy (t);
8180 sysv_va_list_type_node = t;
8181 }
8182 else
8183 {
8184 t = ret;
8185 if (TREE_CODE (t) != RECORD_TYPE)
8186 t = build_variant_type_copy (t);
8187 sysv_va_list_type_node = t;
8188 }
8189 if (ix86_abi != MS_ABI)
8190 {
8191 t = ix86_build_builtin_va_list_abi (MS_ABI);
8192 if (TREE_CODE (t) != RECORD_TYPE)
8193 t = build_variant_type_copy (t);
8194 ms_va_list_type_node = t;
8195 }
8196 else
8197 {
8198 t = ret;
8199 if (TREE_CODE (t) != RECORD_TYPE)
8200 t = build_variant_type_copy (t);
8201 ms_va_list_type_node = t;
8202 }
8203 }
8204
8205 return ret;
8206 }
8207
8208 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8209
8210 static void
8211 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8212 {
8213 rtx save_area, mem;
8214 alias_set_type set;
8215 int i, max;
8216
8217 /* GPR size of varargs save area. */
8218 if (cfun->va_list_gpr_size)
8219 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8220 else
8221 ix86_varargs_gpr_size = 0;
8222
8223 /* FPR size of varargs save area. We don't need it if we don't pass
8224 anything in SSE registers. */
8225 if (TARGET_SSE && cfun->va_list_fpr_size)
8226 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8227 else
8228 ix86_varargs_fpr_size = 0;
8229
8230 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8231 return;
8232
8233 save_area = frame_pointer_rtx;
8234 set = get_varargs_alias_set ();
8235
8236 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8237 if (max > X86_64_REGPARM_MAX)
8238 max = X86_64_REGPARM_MAX;
8239
8240 for (i = cum->regno; i < max; i++)
8241 {
8242 mem = gen_rtx_MEM (word_mode,
8243 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8244 MEM_NOTRAP_P (mem) = 1;
8245 set_mem_alias_set (mem, set);
8246 emit_move_insn (mem,
8247 gen_rtx_REG (word_mode,
8248 x86_64_int_parameter_registers[i]));
8249 }
8250
8251 if (ix86_varargs_fpr_size)
8252 {
8253 enum machine_mode smode;
8254 rtx label, test;
8255
8256 /* Now emit code to save SSE registers. The AX parameter contains number
8257 of SSE parameter registers used to call this function, though all we
8258 actually check here is the zero/non-zero status. */
8259
8260 label = gen_label_rtx ();
8261 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8262 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8263 label));
8264
8265 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8266 we used movdqa (i.e. TImode) instead? Perhaps even better would
8267 be if we could determine the real mode of the data, via a hook
8268 into pass_stdarg. Ignore all that for now. */
8269 smode = V4SFmode;
8270 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8271 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8272
8273 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8274 if (max > X86_64_SSE_REGPARM_MAX)
8275 max = X86_64_SSE_REGPARM_MAX;
8276
8277 for (i = cum->sse_regno; i < max; ++i)
8278 {
8279 mem = plus_constant (Pmode, save_area,
8280 i * 16 + ix86_varargs_gpr_size);
8281 mem = gen_rtx_MEM (smode, mem);
8282 MEM_NOTRAP_P (mem) = 1;
8283 set_mem_alias_set (mem, set);
8284 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8285
8286 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8287 }
8288
8289 emit_label (label);
8290 }
8291 }
8292
8293 static void
8294 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8295 {
8296 alias_set_type set = get_varargs_alias_set ();
8297 int i;
8298
8299 /* Reset to zero, as there might be a sysv vaarg used
8300 before. */
8301 ix86_varargs_gpr_size = 0;
8302 ix86_varargs_fpr_size = 0;
8303
8304 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8305 {
8306 rtx reg, mem;
8307
8308 mem = gen_rtx_MEM (Pmode,
8309 plus_constant (Pmode, virtual_incoming_args_rtx,
8310 i * UNITS_PER_WORD));
8311 MEM_NOTRAP_P (mem) = 1;
8312 set_mem_alias_set (mem, set);
8313
8314 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8315 emit_move_insn (mem, reg);
8316 }
8317 }
8318
8319 static void
8320 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8321 tree type, int *, int no_rtl)
8322 {
8323 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8324 CUMULATIVE_ARGS next_cum;
8325 tree fntype;
8326
8327 /* This argument doesn't appear to be used anymore. Which is good,
8328 because the old code here didn't suppress rtl generation. */
8329 gcc_assert (!no_rtl);
8330
8331 if (!TARGET_64BIT)
8332 return;
8333
8334 fntype = TREE_TYPE (current_function_decl);
8335
8336 /* For varargs, we do not want to skip the dummy va_dcl argument.
8337 For stdargs, we do want to skip the last named argument. */
8338 next_cum = *cum;
8339 if (stdarg_p (fntype))
8340 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8341 true);
8342
8343 if (cum->call_abi == MS_ABI)
8344 setup_incoming_varargs_ms_64 (&next_cum);
8345 else
8346 setup_incoming_varargs_64 (&next_cum);
8347 }
8348
8349 /* Checks if TYPE is of kind va_list char *. */
8350
8351 static bool
8352 is_va_list_char_pointer (tree type)
8353 {
8354 tree canonic;
8355
8356 /* For 32-bit it is always true. */
8357 if (!TARGET_64BIT)
8358 return true;
8359 canonic = ix86_canonical_va_list_type (type);
8360 return (canonic == ms_va_list_type_node
8361 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8362 }
8363
8364 /* Implement va_start. */
8365
8366 static void
8367 ix86_va_start (tree valist, rtx nextarg)
8368 {
8369 HOST_WIDE_INT words, n_gpr, n_fpr;
8370 tree f_gpr, f_fpr, f_ovf, f_sav;
8371 tree gpr, fpr, ovf, sav, t;
8372 tree type;
8373 rtx ovf_rtx;
8374
8375 if (flag_split_stack
8376 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8377 {
8378 unsigned int scratch_regno;
8379
8380 /* When we are splitting the stack, we can't refer to the stack
8381 arguments using internal_arg_pointer, because they may be on
8382 the old stack. The split stack prologue will arrange to
8383 leave a pointer to the old stack arguments in a scratch
8384 register, which we here copy to a pseudo-register. The split
8385 stack prologue can't set the pseudo-register directly because
8386 it (the prologue) runs before any registers have been saved. */
8387
8388 scratch_regno = split_stack_prologue_scratch_regno ();
8389 if (scratch_regno != INVALID_REGNUM)
8390 {
8391 rtx reg, seq;
8392
8393 reg = gen_reg_rtx (Pmode);
8394 cfun->machine->split_stack_varargs_pointer = reg;
8395
8396 start_sequence ();
8397 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8398 seq = get_insns ();
8399 end_sequence ();
8400
8401 push_topmost_sequence ();
8402 emit_insn_after (seq, entry_of_function ());
8403 pop_topmost_sequence ();
8404 }
8405 }
8406
8407 /* Only 64bit target needs something special. */
8408 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8409 {
8410 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8411 std_expand_builtin_va_start (valist, nextarg);
8412 else
8413 {
8414 rtx va_r, next;
8415
8416 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8417 next = expand_binop (ptr_mode, add_optab,
8418 cfun->machine->split_stack_varargs_pointer,
8419 crtl->args.arg_offset_rtx,
8420 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8421 convert_move (va_r, next, 0);
8422 }
8423 return;
8424 }
8425
8426 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8427 f_fpr = DECL_CHAIN (f_gpr);
8428 f_ovf = DECL_CHAIN (f_fpr);
8429 f_sav = DECL_CHAIN (f_ovf);
8430
8431 valist = build_simple_mem_ref (valist);
8432 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8433 /* The following should be folded into the MEM_REF offset. */
8434 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8435 f_gpr, NULL_TREE);
8436 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8437 f_fpr, NULL_TREE);
8438 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8439 f_ovf, NULL_TREE);
8440 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8441 f_sav, NULL_TREE);
8442
8443 /* Count number of gp and fp argument registers used. */
8444 words = crtl->args.info.words;
8445 n_gpr = crtl->args.info.regno;
8446 n_fpr = crtl->args.info.sse_regno;
8447
8448 if (cfun->va_list_gpr_size)
8449 {
8450 type = TREE_TYPE (gpr);
8451 t = build2 (MODIFY_EXPR, type,
8452 gpr, build_int_cst (type, n_gpr * 8));
8453 TREE_SIDE_EFFECTS (t) = 1;
8454 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8455 }
8456
8457 if (TARGET_SSE && cfun->va_list_fpr_size)
8458 {
8459 type = TREE_TYPE (fpr);
8460 t = build2 (MODIFY_EXPR, type, fpr,
8461 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8462 TREE_SIDE_EFFECTS (t) = 1;
8463 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8464 }
8465
8466 /* Find the overflow area. */
8467 type = TREE_TYPE (ovf);
8468 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8469 ovf_rtx = crtl->args.internal_arg_pointer;
8470 else
8471 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8472 t = make_tree (type, ovf_rtx);
8473 if (words != 0)
8474 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8475 t = build2 (MODIFY_EXPR, type, ovf, t);
8476 TREE_SIDE_EFFECTS (t) = 1;
8477 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8478
8479 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8480 {
8481 /* Find the register save area.
8482 Prologue of the function save it right above stack frame. */
8483 type = TREE_TYPE (sav);
8484 t = make_tree (type, frame_pointer_rtx);
8485 if (!ix86_varargs_gpr_size)
8486 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8487 t = build2 (MODIFY_EXPR, type, sav, t);
8488 TREE_SIDE_EFFECTS (t) = 1;
8489 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8490 }
8491 }
8492
8493 /* Implement va_arg. */
8494
8495 static tree
8496 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8497 gimple_seq *post_p)
8498 {
8499 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8500 tree f_gpr, f_fpr, f_ovf, f_sav;
8501 tree gpr, fpr, ovf, sav, t;
8502 int size, rsize;
8503 tree lab_false, lab_over = NULL_TREE;
8504 tree addr, t2;
8505 rtx container;
8506 int indirect_p = 0;
8507 tree ptrtype;
8508 enum machine_mode nat_mode;
8509 unsigned int arg_boundary;
8510
8511 /* Only 64bit target needs something special. */
8512 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8513 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8514
8515 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8516 f_fpr = DECL_CHAIN (f_gpr);
8517 f_ovf = DECL_CHAIN (f_fpr);
8518 f_sav = DECL_CHAIN (f_ovf);
8519
8520 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8521 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8522 valist = build_va_arg_indirect_ref (valist);
8523 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8524 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8525 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8526
8527 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8528 if (indirect_p)
8529 type = build_pointer_type (type);
8530 size = int_size_in_bytes (type);
8531 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8532
8533 nat_mode = type_natural_mode (type, NULL, false);
8534 switch (nat_mode)
8535 {
8536 case V8SFmode:
8537 case V8SImode:
8538 case V32QImode:
8539 case V16HImode:
8540 case V4DFmode:
8541 case V4DImode:
8542 case V16SFmode:
8543 case V16SImode:
8544 case V64QImode:
8545 case V32HImode:
8546 case V8DFmode:
8547 case V8DImode:
8548 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8549 if (!TARGET_64BIT_MS_ABI)
8550 {
8551 container = NULL;
8552 break;
8553 }
8554
8555 default:
8556 container = construct_container (nat_mode, TYPE_MODE (type),
8557 type, 0, X86_64_REGPARM_MAX,
8558 X86_64_SSE_REGPARM_MAX, intreg,
8559 0);
8560 break;
8561 }
8562
8563 /* Pull the value out of the saved registers. */
8564
8565 addr = create_tmp_var (ptr_type_node, "addr");
8566
8567 if (container)
8568 {
8569 int needed_intregs, needed_sseregs;
8570 bool need_temp;
8571 tree int_addr, sse_addr;
8572
8573 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8574 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8575
8576 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8577
8578 need_temp = (!REG_P (container)
8579 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8580 || TYPE_ALIGN (type) > 128));
8581
8582 /* In case we are passing structure, verify that it is consecutive block
8583 on the register save area. If not we need to do moves. */
8584 if (!need_temp && !REG_P (container))
8585 {
8586 /* Verify that all registers are strictly consecutive */
8587 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8588 {
8589 int i;
8590
8591 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8592 {
8593 rtx slot = XVECEXP (container, 0, i);
8594 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8595 || INTVAL (XEXP (slot, 1)) != i * 16)
8596 need_temp = 1;
8597 }
8598 }
8599 else
8600 {
8601 int i;
8602
8603 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8604 {
8605 rtx slot = XVECEXP (container, 0, i);
8606 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8607 || INTVAL (XEXP (slot, 1)) != i * 8)
8608 need_temp = 1;
8609 }
8610 }
8611 }
8612 if (!need_temp)
8613 {
8614 int_addr = addr;
8615 sse_addr = addr;
8616 }
8617 else
8618 {
8619 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8620 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8621 }
8622
8623 /* First ensure that we fit completely in registers. */
8624 if (needed_intregs)
8625 {
8626 t = build_int_cst (TREE_TYPE (gpr),
8627 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8628 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8629 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8630 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8631 gimplify_and_add (t, pre_p);
8632 }
8633 if (needed_sseregs)
8634 {
8635 t = build_int_cst (TREE_TYPE (fpr),
8636 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8637 + X86_64_REGPARM_MAX * 8);
8638 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8639 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8640 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8641 gimplify_and_add (t, pre_p);
8642 }
8643
8644 /* Compute index to start of area used for integer regs. */
8645 if (needed_intregs)
8646 {
8647 /* int_addr = gpr + sav; */
8648 t = fold_build_pointer_plus (sav, gpr);
8649 gimplify_assign (int_addr, t, pre_p);
8650 }
8651 if (needed_sseregs)
8652 {
8653 /* sse_addr = fpr + sav; */
8654 t = fold_build_pointer_plus (sav, fpr);
8655 gimplify_assign (sse_addr, t, pre_p);
8656 }
8657 if (need_temp)
8658 {
8659 int i, prev_size = 0;
8660 tree temp = create_tmp_var (type, "va_arg_tmp");
8661
8662 /* addr = &temp; */
8663 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8664 gimplify_assign (addr, t, pre_p);
8665
8666 for (i = 0; i < XVECLEN (container, 0); i++)
8667 {
8668 rtx slot = XVECEXP (container, 0, i);
8669 rtx reg = XEXP (slot, 0);
8670 enum machine_mode mode = GET_MODE (reg);
8671 tree piece_type;
8672 tree addr_type;
8673 tree daddr_type;
8674 tree src_addr, src;
8675 int src_offset;
8676 tree dest_addr, dest;
8677 int cur_size = GET_MODE_SIZE (mode);
8678
8679 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8680 prev_size = INTVAL (XEXP (slot, 1));
8681 if (prev_size + cur_size > size)
8682 {
8683 cur_size = size - prev_size;
8684 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8685 if (mode == BLKmode)
8686 mode = QImode;
8687 }
8688 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8689 if (mode == GET_MODE (reg))
8690 addr_type = build_pointer_type (piece_type);
8691 else
8692 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8693 true);
8694 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8695 true);
8696
8697 if (SSE_REGNO_P (REGNO (reg)))
8698 {
8699 src_addr = sse_addr;
8700 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8701 }
8702 else
8703 {
8704 src_addr = int_addr;
8705 src_offset = REGNO (reg) * 8;
8706 }
8707 src_addr = fold_convert (addr_type, src_addr);
8708 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8709
8710 dest_addr = fold_convert (daddr_type, addr);
8711 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8712 if (cur_size == GET_MODE_SIZE (mode))
8713 {
8714 src = build_va_arg_indirect_ref (src_addr);
8715 dest = build_va_arg_indirect_ref (dest_addr);
8716
8717 gimplify_assign (dest, src, pre_p);
8718 }
8719 else
8720 {
8721 tree copy
8722 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8723 3, dest_addr, src_addr,
8724 size_int (cur_size));
8725 gimplify_and_add (copy, pre_p);
8726 }
8727 prev_size += cur_size;
8728 }
8729 }
8730
8731 if (needed_intregs)
8732 {
8733 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8734 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8735 gimplify_assign (gpr, t, pre_p);
8736 }
8737
8738 if (needed_sseregs)
8739 {
8740 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8741 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8742 gimplify_assign (fpr, t, pre_p);
8743 }
8744
8745 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8746
8747 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8748 }
8749
8750 /* ... otherwise out of the overflow area. */
8751
8752 /* When we align parameter on stack for caller, if the parameter
8753 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8754 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8755 here with caller. */
8756 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8757 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8758 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8759
8760 /* Care for on-stack alignment if needed. */
8761 if (arg_boundary <= 64 || size == 0)
8762 t = ovf;
8763 else
8764 {
8765 HOST_WIDE_INT align = arg_boundary / 8;
8766 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8767 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8768 build_int_cst (TREE_TYPE (t), -align));
8769 }
8770
8771 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8772 gimplify_assign (addr, t, pre_p);
8773
8774 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8775 gimplify_assign (unshare_expr (ovf), t, pre_p);
8776
8777 if (container)
8778 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8779
8780 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8781 addr = fold_convert (ptrtype, addr);
8782
8783 if (indirect_p)
8784 addr = build_va_arg_indirect_ref (addr);
8785 return build_va_arg_indirect_ref (addr);
8786 }
8787 \f
8788 /* Return true if OPNUM's MEM should be matched
8789 in movabs* patterns. */
8790
8791 bool
8792 ix86_check_movabs (rtx insn, int opnum)
8793 {
8794 rtx set, mem;
8795
8796 set = PATTERN (insn);
8797 if (GET_CODE (set) == PARALLEL)
8798 set = XVECEXP (set, 0, 0);
8799 gcc_assert (GET_CODE (set) == SET);
8800 mem = XEXP (set, opnum);
8801 while (GET_CODE (mem) == SUBREG)
8802 mem = SUBREG_REG (mem);
8803 gcc_assert (MEM_P (mem));
8804 return volatile_ok || !MEM_VOLATILE_P (mem);
8805 }
8806 \f
8807 /* Initialize the table of extra 80387 mathematical constants. */
8808
8809 static void
8810 init_ext_80387_constants (void)
8811 {
8812 static const char * cst[5] =
8813 {
8814 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8815 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8816 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8817 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8818 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8819 };
8820 int i;
8821
8822 for (i = 0; i < 5; i++)
8823 {
8824 real_from_string (&ext_80387_constants_table[i], cst[i]);
8825 /* Ensure each constant is rounded to XFmode precision. */
8826 real_convert (&ext_80387_constants_table[i],
8827 XFmode, &ext_80387_constants_table[i]);
8828 }
8829
8830 ext_80387_constants_init = 1;
8831 }
8832
8833 /* Return non-zero if the constant is something that
8834 can be loaded with a special instruction. */
8835
8836 int
8837 standard_80387_constant_p (rtx x)
8838 {
8839 enum machine_mode mode = GET_MODE (x);
8840
8841 REAL_VALUE_TYPE r;
8842
8843 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8844 return -1;
8845
8846 if (x == CONST0_RTX (mode))
8847 return 1;
8848 if (x == CONST1_RTX (mode))
8849 return 2;
8850
8851 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8852
8853 /* For XFmode constants, try to find a special 80387 instruction when
8854 optimizing for size or on those CPUs that benefit from them. */
8855 if (mode == XFmode
8856 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8857 {
8858 int i;
8859
8860 if (! ext_80387_constants_init)
8861 init_ext_80387_constants ();
8862
8863 for (i = 0; i < 5; i++)
8864 if (real_identical (&r, &ext_80387_constants_table[i]))
8865 return i + 3;
8866 }
8867
8868 /* Load of the constant -0.0 or -1.0 will be split as
8869 fldz;fchs or fld1;fchs sequence. */
8870 if (real_isnegzero (&r))
8871 return 8;
8872 if (real_identical (&r, &dconstm1))
8873 return 9;
8874
8875 return 0;
8876 }
8877
8878 /* Return the opcode of the special instruction to be used to load
8879 the constant X. */
8880
8881 const char *
8882 standard_80387_constant_opcode (rtx x)
8883 {
8884 switch (standard_80387_constant_p (x))
8885 {
8886 case 1:
8887 return "fldz";
8888 case 2:
8889 return "fld1";
8890 case 3:
8891 return "fldlg2";
8892 case 4:
8893 return "fldln2";
8894 case 5:
8895 return "fldl2e";
8896 case 6:
8897 return "fldl2t";
8898 case 7:
8899 return "fldpi";
8900 case 8:
8901 case 9:
8902 return "#";
8903 default:
8904 gcc_unreachable ();
8905 }
8906 }
8907
8908 /* Return the CONST_DOUBLE representing the 80387 constant that is
8909 loaded by the specified special instruction. The argument IDX
8910 matches the return value from standard_80387_constant_p. */
8911
8912 rtx
8913 standard_80387_constant_rtx (int idx)
8914 {
8915 int i;
8916
8917 if (! ext_80387_constants_init)
8918 init_ext_80387_constants ();
8919
8920 switch (idx)
8921 {
8922 case 3:
8923 case 4:
8924 case 5:
8925 case 6:
8926 case 7:
8927 i = idx - 3;
8928 break;
8929
8930 default:
8931 gcc_unreachable ();
8932 }
8933
8934 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8935 XFmode);
8936 }
8937
8938 /* Return 1 if X is all 0s and 2 if x is all 1s
8939 in supported SSE/AVX vector mode. */
8940
8941 int
8942 standard_sse_constant_p (rtx x)
8943 {
8944 enum machine_mode mode = GET_MODE (x);
8945
8946 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8947 return 1;
8948 if (vector_all_ones_operand (x, mode))
8949 switch (mode)
8950 {
8951 case V16QImode:
8952 case V8HImode:
8953 case V4SImode:
8954 case V2DImode:
8955 if (TARGET_SSE2)
8956 return 2;
8957 case V32QImode:
8958 case V16HImode:
8959 case V8SImode:
8960 case V4DImode:
8961 if (TARGET_AVX2)
8962 return 2;
8963 case V64QImode:
8964 case V32HImode:
8965 case V16SImode:
8966 case V8DImode:
8967 if (TARGET_AVX512F)
8968 return 2;
8969 default:
8970 break;
8971 }
8972
8973 return 0;
8974 }
8975
8976 /* Return the opcode of the special instruction to be used to load
8977 the constant X. */
8978
8979 const char *
8980 standard_sse_constant_opcode (rtx insn, rtx x)
8981 {
8982 switch (standard_sse_constant_p (x))
8983 {
8984 case 1:
8985 switch (get_attr_mode (insn))
8986 {
8987 case MODE_XI:
8988 case MODE_V16SF:
8989 return "vpxord\t%g0, %g0, %g0";
8990 case MODE_V8DF:
8991 return "vpxorq\t%g0, %g0, %g0";
8992 case MODE_TI:
8993 return "%vpxor\t%0, %d0";
8994 case MODE_V2DF:
8995 return "%vxorpd\t%0, %d0";
8996 case MODE_V4SF:
8997 return "%vxorps\t%0, %d0";
8998
8999 case MODE_OI:
9000 return "vpxor\t%x0, %x0, %x0";
9001 case MODE_V4DF:
9002 return "vxorpd\t%x0, %x0, %x0";
9003 case MODE_V8SF:
9004 return "vxorps\t%x0, %x0, %x0";
9005
9006 default:
9007 break;
9008 }
9009
9010 case 2:
9011 if (get_attr_mode (insn) == MODE_XI
9012 || get_attr_mode (insn) == MODE_V8DF
9013 || get_attr_mode (insn) == MODE_V16SF)
9014 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9015 if (TARGET_AVX)
9016 return "vpcmpeqd\t%0, %0, %0";
9017 else
9018 return "pcmpeqd\t%0, %0";
9019
9020 default:
9021 break;
9022 }
9023 gcc_unreachable ();
9024 }
9025
9026 /* Returns true if OP contains a symbol reference */
9027
9028 bool
9029 symbolic_reference_mentioned_p (rtx op)
9030 {
9031 const char *fmt;
9032 int i;
9033
9034 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9035 return true;
9036
9037 fmt = GET_RTX_FORMAT (GET_CODE (op));
9038 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9039 {
9040 if (fmt[i] == 'E')
9041 {
9042 int j;
9043
9044 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9045 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9046 return true;
9047 }
9048
9049 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9050 return true;
9051 }
9052
9053 return false;
9054 }
9055
9056 /* Return true if it is appropriate to emit `ret' instructions in the
9057 body of a function. Do this only if the epilogue is simple, needing a
9058 couple of insns. Prior to reloading, we can't tell how many registers
9059 must be saved, so return false then. Return false if there is no frame
9060 marker to de-allocate. */
9061
9062 bool
9063 ix86_can_use_return_insn_p (void)
9064 {
9065 struct ix86_frame frame;
9066
9067 if (! reload_completed || frame_pointer_needed)
9068 return 0;
9069
9070 /* Don't allow more than 32k pop, since that's all we can do
9071 with one instruction. */
9072 if (crtl->args.pops_args && crtl->args.size >= 32768)
9073 return 0;
9074
9075 ix86_compute_frame_layout (&frame);
9076 return (frame.stack_pointer_offset == UNITS_PER_WORD
9077 && (frame.nregs + frame.nsseregs) == 0);
9078 }
9079 \f
9080 /* Value should be nonzero if functions must have frame pointers.
9081 Zero means the frame pointer need not be set up (and parms may
9082 be accessed via the stack pointer) in functions that seem suitable. */
9083
9084 static bool
9085 ix86_frame_pointer_required (void)
9086 {
9087 /* If we accessed previous frames, then the generated code expects
9088 to be able to access the saved ebp value in our frame. */
9089 if (cfun->machine->accesses_prev_frame)
9090 return true;
9091
9092 /* Several x86 os'es need a frame pointer for other reasons,
9093 usually pertaining to setjmp. */
9094 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9095 return true;
9096
9097 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9098 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9099 return true;
9100
9101 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9102 allocation is 4GB. */
9103 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9104 return true;
9105
9106 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9107 turns off the frame pointer by default. Turn it back on now if
9108 we've not got a leaf function. */
9109 if (TARGET_OMIT_LEAF_FRAME_POINTER
9110 && (!crtl->is_leaf
9111 || ix86_current_function_calls_tls_descriptor))
9112 return true;
9113
9114 if (crtl->profile && !flag_fentry)
9115 return true;
9116
9117 return false;
9118 }
9119
9120 /* Record that the current function accesses previous call frames. */
9121
9122 void
9123 ix86_setup_frame_addresses (void)
9124 {
9125 cfun->machine->accesses_prev_frame = 1;
9126 }
9127 \f
9128 #ifndef USE_HIDDEN_LINKONCE
9129 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9130 # define USE_HIDDEN_LINKONCE 1
9131 # else
9132 # define USE_HIDDEN_LINKONCE 0
9133 # endif
9134 #endif
9135
9136 static int pic_labels_used;
9137
9138 /* Fills in the label name that should be used for a pc thunk for
9139 the given register. */
9140
9141 static void
9142 get_pc_thunk_name (char name[32], unsigned int regno)
9143 {
9144 gcc_assert (!TARGET_64BIT);
9145
9146 if (USE_HIDDEN_LINKONCE)
9147 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9148 else
9149 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9150 }
9151
9152
9153 /* This function generates code for -fpic that loads %ebx with
9154 the return address of the caller and then returns. */
9155
9156 static void
9157 ix86_code_end (void)
9158 {
9159 rtx xops[2];
9160 int regno;
9161
9162 for (regno = AX_REG; regno <= SP_REG; regno++)
9163 {
9164 char name[32];
9165 tree decl;
9166
9167 if (!(pic_labels_used & (1 << regno)))
9168 continue;
9169
9170 get_pc_thunk_name (name, regno);
9171
9172 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9173 get_identifier (name),
9174 build_function_type_list (void_type_node, NULL_TREE));
9175 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9176 NULL_TREE, void_type_node);
9177 TREE_PUBLIC (decl) = 1;
9178 TREE_STATIC (decl) = 1;
9179 DECL_IGNORED_P (decl) = 1;
9180
9181 #if TARGET_MACHO
9182 if (TARGET_MACHO)
9183 {
9184 switch_to_section (darwin_sections[text_coal_section]);
9185 fputs ("\t.weak_definition\t", asm_out_file);
9186 assemble_name (asm_out_file, name);
9187 fputs ("\n\t.private_extern\t", asm_out_file);
9188 assemble_name (asm_out_file, name);
9189 putc ('\n', asm_out_file);
9190 ASM_OUTPUT_LABEL (asm_out_file, name);
9191 DECL_WEAK (decl) = 1;
9192 }
9193 else
9194 #endif
9195 if (USE_HIDDEN_LINKONCE)
9196 {
9197 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9198
9199 targetm.asm_out.unique_section (decl, 0);
9200 switch_to_section (get_named_section (decl, NULL, 0));
9201
9202 targetm.asm_out.globalize_label (asm_out_file, name);
9203 fputs ("\t.hidden\t", asm_out_file);
9204 assemble_name (asm_out_file, name);
9205 putc ('\n', asm_out_file);
9206 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9207 }
9208 else
9209 {
9210 switch_to_section (text_section);
9211 ASM_OUTPUT_LABEL (asm_out_file, name);
9212 }
9213
9214 DECL_INITIAL (decl) = make_node (BLOCK);
9215 current_function_decl = decl;
9216 init_function_start (decl);
9217 first_function_block_is_cold = false;
9218 /* Make sure unwind info is emitted for the thunk if needed. */
9219 final_start_function (emit_barrier (), asm_out_file, 1);
9220
9221 /* Pad stack IP move with 4 instructions (two NOPs count
9222 as one instruction). */
9223 if (TARGET_PAD_SHORT_FUNCTION)
9224 {
9225 int i = 8;
9226
9227 while (i--)
9228 fputs ("\tnop\n", asm_out_file);
9229 }
9230
9231 xops[0] = gen_rtx_REG (Pmode, regno);
9232 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9233 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9234 fputs ("\tret\n", asm_out_file);
9235 final_end_function ();
9236 init_insn_lengths ();
9237 free_after_compilation (cfun);
9238 set_cfun (NULL);
9239 current_function_decl = NULL;
9240 }
9241
9242 if (flag_split_stack)
9243 file_end_indicate_split_stack ();
9244 }
9245
9246 /* Emit code for the SET_GOT patterns. */
9247
9248 const char *
9249 output_set_got (rtx dest, rtx label)
9250 {
9251 rtx xops[3];
9252
9253 xops[0] = dest;
9254
9255 if (TARGET_VXWORKS_RTP && flag_pic)
9256 {
9257 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9258 xops[2] = gen_rtx_MEM (Pmode,
9259 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9260 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9261
9262 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9263 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9264 an unadorned address. */
9265 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9266 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9267 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9268 return "";
9269 }
9270
9271 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9272
9273 if (!flag_pic)
9274 {
9275 if (TARGET_MACHO)
9276 /* We don't need a pic base, we're not producing pic. */
9277 gcc_unreachable ();
9278
9279 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9280 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9281 targetm.asm_out.internal_label (asm_out_file, "L",
9282 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9283 }
9284 else
9285 {
9286 char name[32];
9287 get_pc_thunk_name (name, REGNO (dest));
9288 pic_labels_used |= 1 << REGNO (dest);
9289
9290 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9291 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9292 output_asm_insn ("call\t%X2", xops);
9293
9294 #if TARGET_MACHO
9295 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9296 This is what will be referenced by the Mach-O PIC subsystem. */
9297 if (machopic_should_output_picbase_label () || !label)
9298 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9299
9300 /* When we are restoring the pic base at the site of a nonlocal label,
9301 and we decided to emit the pic base above, we will still output a
9302 local label used for calculating the correction offset (even though
9303 the offset will be 0 in that case). */
9304 if (label)
9305 targetm.asm_out.internal_label (asm_out_file, "L",
9306 CODE_LABEL_NUMBER (label));
9307 #endif
9308 }
9309
9310 if (!TARGET_MACHO)
9311 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9312
9313 return "";
9314 }
9315
9316 /* Generate an "push" pattern for input ARG. */
9317
9318 static rtx
9319 gen_push (rtx arg)
9320 {
9321 struct machine_function *m = cfun->machine;
9322
9323 if (m->fs.cfa_reg == stack_pointer_rtx)
9324 m->fs.cfa_offset += UNITS_PER_WORD;
9325 m->fs.sp_offset += UNITS_PER_WORD;
9326
9327 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9328 arg = gen_rtx_REG (word_mode, REGNO (arg));
9329
9330 return gen_rtx_SET (VOIDmode,
9331 gen_rtx_MEM (word_mode,
9332 gen_rtx_PRE_DEC (Pmode,
9333 stack_pointer_rtx)),
9334 arg);
9335 }
9336
9337 /* Generate an "pop" pattern for input ARG. */
9338
9339 static rtx
9340 gen_pop (rtx arg)
9341 {
9342 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9343 arg = gen_rtx_REG (word_mode, REGNO (arg));
9344
9345 return gen_rtx_SET (VOIDmode,
9346 arg,
9347 gen_rtx_MEM (word_mode,
9348 gen_rtx_POST_INC (Pmode,
9349 stack_pointer_rtx)));
9350 }
9351
9352 /* Return >= 0 if there is an unused call-clobbered register available
9353 for the entire function. */
9354
9355 static unsigned int
9356 ix86_select_alt_pic_regnum (void)
9357 {
9358 if (crtl->is_leaf
9359 && !crtl->profile
9360 && !ix86_current_function_calls_tls_descriptor)
9361 {
9362 int i, drap;
9363 /* Can't use the same register for both PIC and DRAP. */
9364 if (crtl->drap_reg)
9365 drap = REGNO (crtl->drap_reg);
9366 else
9367 drap = -1;
9368 for (i = 2; i >= 0; --i)
9369 if (i != drap && !df_regs_ever_live_p (i))
9370 return i;
9371 }
9372
9373 return INVALID_REGNUM;
9374 }
9375
9376 /* Return TRUE if we need to save REGNO. */
9377
9378 static bool
9379 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9380 {
9381 if (pic_offset_table_rtx
9382 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9383 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9384 || crtl->profile
9385 || crtl->calls_eh_return
9386 || crtl->uses_const_pool
9387 || cfun->has_nonlocal_label))
9388 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9389
9390 if (crtl->calls_eh_return && maybe_eh_return)
9391 {
9392 unsigned i;
9393 for (i = 0; ; i++)
9394 {
9395 unsigned test = EH_RETURN_DATA_REGNO (i);
9396 if (test == INVALID_REGNUM)
9397 break;
9398 if (test == regno)
9399 return true;
9400 }
9401 }
9402
9403 if (crtl->drap_reg
9404 && regno == REGNO (crtl->drap_reg)
9405 && !cfun->machine->no_drap_save_restore)
9406 return true;
9407
9408 return (df_regs_ever_live_p (regno)
9409 && !call_used_regs[regno]
9410 && !fixed_regs[regno]
9411 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9412 }
9413
9414 /* Return number of saved general prupose registers. */
9415
9416 static int
9417 ix86_nsaved_regs (void)
9418 {
9419 int nregs = 0;
9420 int regno;
9421
9422 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9423 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9424 nregs ++;
9425 return nregs;
9426 }
9427
9428 /* Return number of saved SSE registrers. */
9429
9430 static int
9431 ix86_nsaved_sseregs (void)
9432 {
9433 int nregs = 0;
9434 int regno;
9435
9436 if (!TARGET_64BIT_MS_ABI)
9437 return 0;
9438 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9439 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9440 nregs ++;
9441 return nregs;
9442 }
9443
9444 /* Given FROM and TO register numbers, say whether this elimination is
9445 allowed. If stack alignment is needed, we can only replace argument
9446 pointer with hard frame pointer, or replace frame pointer with stack
9447 pointer. Otherwise, frame pointer elimination is automatically
9448 handled and all other eliminations are valid. */
9449
9450 static bool
9451 ix86_can_eliminate (const int from, const int to)
9452 {
9453 if (stack_realign_fp)
9454 return ((from == ARG_POINTER_REGNUM
9455 && to == HARD_FRAME_POINTER_REGNUM)
9456 || (from == FRAME_POINTER_REGNUM
9457 && to == STACK_POINTER_REGNUM));
9458 else
9459 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9460 }
9461
9462 /* Return the offset between two registers, one to be eliminated, and the other
9463 its replacement, at the start of a routine. */
9464
9465 HOST_WIDE_INT
9466 ix86_initial_elimination_offset (int from, int to)
9467 {
9468 struct ix86_frame frame;
9469 ix86_compute_frame_layout (&frame);
9470
9471 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9472 return frame.hard_frame_pointer_offset;
9473 else if (from == FRAME_POINTER_REGNUM
9474 && to == HARD_FRAME_POINTER_REGNUM)
9475 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9476 else
9477 {
9478 gcc_assert (to == STACK_POINTER_REGNUM);
9479
9480 if (from == ARG_POINTER_REGNUM)
9481 return frame.stack_pointer_offset;
9482
9483 gcc_assert (from == FRAME_POINTER_REGNUM);
9484 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9485 }
9486 }
9487
9488 /* In a dynamically-aligned function, we can't know the offset from
9489 stack pointer to frame pointer, so we must ensure that setjmp
9490 eliminates fp against the hard fp (%ebp) rather than trying to
9491 index from %esp up to the top of the frame across a gap that is
9492 of unknown (at compile-time) size. */
9493 static rtx
9494 ix86_builtin_setjmp_frame_value (void)
9495 {
9496 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9497 }
9498
9499 /* When using -fsplit-stack, the allocation routines set a field in
9500 the TCB to the bottom of the stack plus this much space, measured
9501 in bytes. */
9502
9503 #define SPLIT_STACK_AVAILABLE 256
9504
9505 /* Fill structure ix86_frame about frame of currently computed function. */
9506
9507 static void
9508 ix86_compute_frame_layout (struct ix86_frame *frame)
9509 {
9510 unsigned HOST_WIDE_INT stack_alignment_needed;
9511 HOST_WIDE_INT offset;
9512 unsigned HOST_WIDE_INT preferred_alignment;
9513 HOST_WIDE_INT size = get_frame_size ();
9514 HOST_WIDE_INT to_allocate;
9515
9516 frame->nregs = ix86_nsaved_regs ();
9517 frame->nsseregs = ix86_nsaved_sseregs ();
9518
9519 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9520 function prologues and leaf. */
9521 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9522 && (!crtl->is_leaf || cfun->calls_alloca != 0
9523 || ix86_current_function_calls_tls_descriptor))
9524 {
9525 crtl->preferred_stack_boundary = 128;
9526 crtl->stack_alignment_needed = 128;
9527 }
9528 /* preferred_stack_boundary is never updated for call
9529 expanded from tls descriptor. Update it here. We don't update it in
9530 expand stage because according to the comments before
9531 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9532 away. */
9533 else if (ix86_current_function_calls_tls_descriptor
9534 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9535 {
9536 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9537 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9538 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9539 }
9540
9541 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9542 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9543
9544 gcc_assert (!size || stack_alignment_needed);
9545 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9546 gcc_assert (preferred_alignment <= stack_alignment_needed);
9547
9548 /* For SEH we have to limit the amount of code movement into the prologue.
9549 At present we do this via a BLOCKAGE, at which point there's very little
9550 scheduling that can be done, which means that there's very little point
9551 in doing anything except PUSHs. */
9552 if (TARGET_SEH)
9553 cfun->machine->use_fast_prologue_epilogue = false;
9554
9555 /* During reload iteration the amount of registers saved can change.
9556 Recompute the value as needed. Do not recompute when amount of registers
9557 didn't change as reload does multiple calls to the function and does not
9558 expect the decision to change within single iteration. */
9559 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9560 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9561 {
9562 int count = frame->nregs;
9563 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9564
9565 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9566
9567 /* The fast prologue uses move instead of push to save registers. This
9568 is significantly longer, but also executes faster as modern hardware
9569 can execute the moves in parallel, but can't do that for push/pop.
9570
9571 Be careful about choosing what prologue to emit: When function takes
9572 many instructions to execute we may use slow version as well as in
9573 case function is known to be outside hot spot (this is known with
9574 feedback only). Weight the size of function by number of registers
9575 to save as it is cheap to use one or two push instructions but very
9576 slow to use many of them. */
9577 if (count)
9578 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9579 if (node->frequency < NODE_FREQUENCY_NORMAL
9580 || (flag_branch_probabilities
9581 && node->frequency < NODE_FREQUENCY_HOT))
9582 cfun->machine->use_fast_prologue_epilogue = false;
9583 else
9584 cfun->machine->use_fast_prologue_epilogue
9585 = !expensive_function_p (count);
9586 }
9587
9588 frame->save_regs_using_mov
9589 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9590 /* If static stack checking is enabled and done with probes,
9591 the registers need to be saved before allocating the frame. */
9592 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9593
9594 /* Skip return address. */
9595 offset = UNITS_PER_WORD;
9596
9597 /* Skip pushed static chain. */
9598 if (ix86_static_chain_on_stack)
9599 offset += UNITS_PER_WORD;
9600
9601 /* Skip saved base pointer. */
9602 if (frame_pointer_needed)
9603 offset += UNITS_PER_WORD;
9604 frame->hfp_save_offset = offset;
9605
9606 /* The traditional frame pointer location is at the top of the frame. */
9607 frame->hard_frame_pointer_offset = offset;
9608
9609 /* Register save area */
9610 offset += frame->nregs * UNITS_PER_WORD;
9611 frame->reg_save_offset = offset;
9612
9613 /* On SEH target, registers are pushed just before the frame pointer
9614 location. */
9615 if (TARGET_SEH)
9616 frame->hard_frame_pointer_offset = offset;
9617
9618 /* Align and set SSE register save area. */
9619 if (frame->nsseregs)
9620 {
9621 /* The only ABI that has saved SSE registers (Win64) also has a
9622 16-byte aligned default stack, and thus we don't need to be
9623 within the re-aligned local stack frame to save them. */
9624 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9625 offset = (offset + 16 - 1) & -16;
9626 offset += frame->nsseregs * 16;
9627 }
9628 frame->sse_reg_save_offset = offset;
9629
9630 /* The re-aligned stack starts here. Values before this point are not
9631 directly comparable with values below this point. In order to make
9632 sure that no value happens to be the same before and after, force
9633 the alignment computation below to add a non-zero value. */
9634 if (stack_realign_fp)
9635 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9636
9637 /* Va-arg area */
9638 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9639 offset += frame->va_arg_size;
9640
9641 /* Align start of frame for local function. */
9642 if (stack_realign_fp
9643 || offset != frame->sse_reg_save_offset
9644 || size != 0
9645 || !crtl->is_leaf
9646 || cfun->calls_alloca
9647 || ix86_current_function_calls_tls_descriptor)
9648 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9649
9650 /* Frame pointer points here. */
9651 frame->frame_pointer_offset = offset;
9652
9653 offset += size;
9654
9655 /* Add outgoing arguments area. Can be skipped if we eliminated
9656 all the function calls as dead code.
9657 Skipping is however impossible when function calls alloca. Alloca
9658 expander assumes that last crtl->outgoing_args_size
9659 of stack frame are unused. */
9660 if (ACCUMULATE_OUTGOING_ARGS
9661 && (!crtl->is_leaf || cfun->calls_alloca
9662 || ix86_current_function_calls_tls_descriptor))
9663 {
9664 offset += crtl->outgoing_args_size;
9665 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9666 }
9667 else
9668 frame->outgoing_arguments_size = 0;
9669
9670 /* Align stack boundary. Only needed if we're calling another function
9671 or using alloca. */
9672 if (!crtl->is_leaf || cfun->calls_alloca
9673 || ix86_current_function_calls_tls_descriptor)
9674 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9675
9676 /* We've reached end of stack frame. */
9677 frame->stack_pointer_offset = offset;
9678
9679 /* Size prologue needs to allocate. */
9680 to_allocate = offset - frame->sse_reg_save_offset;
9681
9682 if ((!to_allocate && frame->nregs <= 1)
9683 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9684 frame->save_regs_using_mov = false;
9685
9686 if (ix86_using_red_zone ()
9687 && crtl->sp_is_unchanging
9688 && crtl->is_leaf
9689 && !ix86_current_function_calls_tls_descriptor)
9690 {
9691 frame->red_zone_size = to_allocate;
9692 if (frame->save_regs_using_mov)
9693 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9694 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9695 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9696 }
9697 else
9698 frame->red_zone_size = 0;
9699 frame->stack_pointer_offset -= frame->red_zone_size;
9700
9701 /* The SEH frame pointer location is near the bottom of the frame.
9702 This is enforced by the fact that the difference between the
9703 stack pointer and the frame pointer is limited to 240 bytes in
9704 the unwind data structure. */
9705 if (TARGET_SEH)
9706 {
9707 HOST_WIDE_INT diff;
9708
9709 /* If we can leave the frame pointer where it is, do so. Also, returns
9710 the establisher frame for __builtin_frame_address (0). */
9711 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9712 if (diff <= SEH_MAX_FRAME_SIZE
9713 && (diff > 240 || (diff & 15) != 0)
9714 && !crtl->accesses_prior_frames)
9715 {
9716 /* Ideally we'd determine what portion of the local stack frame
9717 (within the constraint of the lowest 240) is most heavily used.
9718 But without that complication, simply bias the frame pointer
9719 by 128 bytes so as to maximize the amount of the local stack
9720 frame that is addressable with 8-bit offsets. */
9721 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9722 }
9723 }
9724 }
9725
9726 /* This is semi-inlined memory_address_length, but simplified
9727 since we know that we're always dealing with reg+offset, and
9728 to avoid having to create and discard all that rtl. */
9729
9730 static inline int
9731 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9732 {
9733 int len = 4;
9734
9735 if (offset == 0)
9736 {
9737 /* EBP and R13 cannot be encoded without an offset. */
9738 len = (regno == BP_REG || regno == R13_REG);
9739 }
9740 else if (IN_RANGE (offset, -128, 127))
9741 len = 1;
9742
9743 /* ESP and R12 must be encoded with a SIB byte. */
9744 if (regno == SP_REG || regno == R12_REG)
9745 len++;
9746
9747 return len;
9748 }
9749
9750 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9751 The valid base registers are taken from CFUN->MACHINE->FS. */
9752
9753 static rtx
9754 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9755 {
9756 const struct machine_function *m = cfun->machine;
9757 rtx base_reg = NULL;
9758 HOST_WIDE_INT base_offset = 0;
9759
9760 if (m->use_fast_prologue_epilogue)
9761 {
9762 /* Choose the base register most likely to allow the most scheduling
9763 opportunities. Generally FP is valid throughout the function,
9764 while DRAP must be reloaded within the epilogue. But choose either
9765 over the SP due to increased encoding size. */
9766
9767 if (m->fs.fp_valid)
9768 {
9769 base_reg = hard_frame_pointer_rtx;
9770 base_offset = m->fs.fp_offset - cfa_offset;
9771 }
9772 else if (m->fs.drap_valid)
9773 {
9774 base_reg = crtl->drap_reg;
9775 base_offset = 0 - cfa_offset;
9776 }
9777 else if (m->fs.sp_valid)
9778 {
9779 base_reg = stack_pointer_rtx;
9780 base_offset = m->fs.sp_offset - cfa_offset;
9781 }
9782 }
9783 else
9784 {
9785 HOST_WIDE_INT toffset;
9786 int len = 16, tlen;
9787
9788 /* Choose the base register with the smallest address encoding.
9789 With a tie, choose FP > DRAP > SP. */
9790 if (m->fs.sp_valid)
9791 {
9792 base_reg = stack_pointer_rtx;
9793 base_offset = m->fs.sp_offset - cfa_offset;
9794 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9795 }
9796 if (m->fs.drap_valid)
9797 {
9798 toffset = 0 - cfa_offset;
9799 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9800 if (tlen <= len)
9801 {
9802 base_reg = crtl->drap_reg;
9803 base_offset = toffset;
9804 len = tlen;
9805 }
9806 }
9807 if (m->fs.fp_valid)
9808 {
9809 toffset = m->fs.fp_offset - cfa_offset;
9810 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9811 if (tlen <= len)
9812 {
9813 base_reg = hard_frame_pointer_rtx;
9814 base_offset = toffset;
9815 len = tlen;
9816 }
9817 }
9818 }
9819 gcc_assert (base_reg != NULL);
9820
9821 return plus_constant (Pmode, base_reg, base_offset);
9822 }
9823
9824 /* Emit code to save registers in the prologue. */
9825
9826 static void
9827 ix86_emit_save_regs (void)
9828 {
9829 unsigned int regno;
9830 rtx insn;
9831
9832 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9833 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9834 {
9835 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9836 RTX_FRAME_RELATED_P (insn) = 1;
9837 }
9838 }
9839
9840 /* Emit a single register save at CFA - CFA_OFFSET. */
9841
9842 static void
9843 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9844 HOST_WIDE_INT cfa_offset)
9845 {
9846 struct machine_function *m = cfun->machine;
9847 rtx reg = gen_rtx_REG (mode, regno);
9848 rtx mem, addr, base, insn;
9849
9850 addr = choose_baseaddr (cfa_offset);
9851 mem = gen_frame_mem (mode, addr);
9852
9853 /* For SSE saves, we need to indicate the 128-bit alignment. */
9854 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9855
9856 insn = emit_move_insn (mem, reg);
9857 RTX_FRAME_RELATED_P (insn) = 1;
9858
9859 base = addr;
9860 if (GET_CODE (base) == PLUS)
9861 base = XEXP (base, 0);
9862 gcc_checking_assert (REG_P (base));
9863
9864 /* When saving registers into a re-aligned local stack frame, avoid
9865 any tricky guessing by dwarf2out. */
9866 if (m->fs.realigned)
9867 {
9868 gcc_checking_assert (stack_realign_drap);
9869
9870 if (regno == REGNO (crtl->drap_reg))
9871 {
9872 /* A bit of a hack. We force the DRAP register to be saved in
9873 the re-aligned stack frame, which provides us with a copy
9874 of the CFA that will last past the prologue. Install it. */
9875 gcc_checking_assert (cfun->machine->fs.fp_valid);
9876 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9877 cfun->machine->fs.fp_offset - cfa_offset);
9878 mem = gen_rtx_MEM (mode, addr);
9879 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9880 }
9881 else
9882 {
9883 /* The frame pointer is a stable reference within the
9884 aligned frame. Use it. */
9885 gcc_checking_assert (cfun->machine->fs.fp_valid);
9886 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9887 cfun->machine->fs.fp_offset - cfa_offset);
9888 mem = gen_rtx_MEM (mode, addr);
9889 add_reg_note (insn, REG_CFA_EXPRESSION,
9890 gen_rtx_SET (VOIDmode, mem, reg));
9891 }
9892 }
9893
9894 /* The memory may not be relative to the current CFA register,
9895 which means that we may need to generate a new pattern for
9896 use by the unwind info. */
9897 else if (base != m->fs.cfa_reg)
9898 {
9899 addr = plus_constant (Pmode, m->fs.cfa_reg,
9900 m->fs.cfa_offset - cfa_offset);
9901 mem = gen_rtx_MEM (mode, addr);
9902 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9903 }
9904 }
9905
9906 /* Emit code to save registers using MOV insns.
9907 First register is stored at CFA - CFA_OFFSET. */
9908 static void
9909 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9910 {
9911 unsigned int regno;
9912
9913 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9914 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9915 {
9916 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9917 cfa_offset -= UNITS_PER_WORD;
9918 }
9919 }
9920
9921 /* Emit code to save SSE registers using MOV insns.
9922 First register is stored at CFA - CFA_OFFSET. */
9923 static void
9924 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9925 {
9926 unsigned int regno;
9927
9928 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9929 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9930 {
9931 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9932 cfa_offset -= 16;
9933 }
9934 }
9935
9936 static GTY(()) rtx queued_cfa_restores;
9937
9938 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9939 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9940 Don't add the note if the previously saved value will be left untouched
9941 within stack red-zone till return, as unwinders can find the same value
9942 in the register and on the stack. */
9943
9944 static void
9945 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9946 {
9947 if (!crtl->shrink_wrapped
9948 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9949 return;
9950
9951 if (insn)
9952 {
9953 add_reg_note (insn, REG_CFA_RESTORE, reg);
9954 RTX_FRAME_RELATED_P (insn) = 1;
9955 }
9956 else
9957 queued_cfa_restores
9958 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9959 }
9960
9961 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9962
9963 static void
9964 ix86_add_queued_cfa_restore_notes (rtx insn)
9965 {
9966 rtx last;
9967 if (!queued_cfa_restores)
9968 return;
9969 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9970 ;
9971 XEXP (last, 1) = REG_NOTES (insn);
9972 REG_NOTES (insn) = queued_cfa_restores;
9973 queued_cfa_restores = NULL_RTX;
9974 RTX_FRAME_RELATED_P (insn) = 1;
9975 }
9976
9977 /* Expand prologue or epilogue stack adjustment.
9978 The pattern exist to put a dependency on all ebp-based memory accesses.
9979 STYLE should be negative if instructions should be marked as frame related,
9980 zero if %r11 register is live and cannot be freely used and positive
9981 otherwise. */
9982
9983 static void
9984 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9985 int style, bool set_cfa)
9986 {
9987 struct machine_function *m = cfun->machine;
9988 rtx insn;
9989 bool add_frame_related_expr = false;
9990
9991 if (Pmode == SImode)
9992 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9993 else if (x86_64_immediate_operand (offset, DImode))
9994 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9995 else
9996 {
9997 rtx tmp;
9998 /* r11 is used by indirect sibcall return as well, set before the
9999 epilogue and used after the epilogue. */
10000 if (style)
10001 tmp = gen_rtx_REG (DImode, R11_REG);
10002 else
10003 {
10004 gcc_assert (src != hard_frame_pointer_rtx
10005 && dest != hard_frame_pointer_rtx);
10006 tmp = hard_frame_pointer_rtx;
10007 }
10008 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10009 if (style < 0)
10010 add_frame_related_expr = true;
10011
10012 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10013 }
10014
10015 insn = emit_insn (insn);
10016 if (style >= 0)
10017 ix86_add_queued_cfa_restore_notes (insn);
10018
10019 if (set_cfa)
10020 {
10021 rtx r;
10022
10023 gcc_assert (m->fs.cfa_reg == src);
10024 m->fs.cfa_offset += INTVAL (offset);
10025 m->fs.cfa_reg = dest;
10026
10027 r = gen_rtx_PLUS (Pmode, src, offset);
10028 r = gen_rtx_SET (VOIDmode, dest, r);
10029 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10030 RTX_FRAME_RELATED_P (insn) = 1;
10031 }
10032 else if (style < 0)
10033 {
10034 RTX_FRAME_RELATED_P (insn) = 1;
10035 if (add_frame_related_expr)
10036 {
10037 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10038 r = gen_rtx_SET (VOIDmode, dest, r);
10039 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10040 }
10041 }
10042
10043 if (dest == stack_pointer_rtx)
10044 {
10045 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10046 bool valid = m->fs.sp_valid;
10047
10048 if (src == hard_frame_pointer_rtx)
10049 {
10050 valid = m->fs.fp_valid;
10051 ooffset = m->fs.fp_offset;
10052 }
10053 else if (src == crtl->drap_reg)
10054 {
10055 valid = m->fs.drap_valid;
10056 ooffset = 0;
10057 }
10058 else
10059 {
10060 /* Else there are two possibilities: SP itself, which we set
10061 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10062 taken care of this by hand along the eh_return path. */
10063 gcc_checking_assert (src == stack_pointer_rtx
10064 || offset == const0_rtx);
10065 }
10066
10067 m->fs.sp_offset = ooffset - INTVAL (offset);
10068 m->fs.sp_valid = valid;
10069 }
10070 }
10071
10072 /* Find an available register to be used as dynamic realign argument
10073 pointer regsiter. Such a register will be written in prologue and
10074 used in begin of body, so it must not be
10075 1. parameter passing register.
10076 2. GOT pointer.
10077 We reuse static-chain register if it is available. Otherwise, we
10078 use DI for i386 and R13 for x86-64. We chose R13 since it has
10079 shorter encoding.
10080
10081 Return: the regno of chosen register. */
10082
10083 static unsigned int
10084 find_drap_reg (void)
10085 {
10086 tree decl = cfun->decl;
10087
10088 if (TARGET_64BIT)
10089 {
10090 /* Use R13 for nested function or function need static chain.
10091 Since function with tail call may use any caller-saved
10092 registers in epilogue, DRAP must not use caller-saved
10093 register in such case. */
10094 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10095 return R13_REG;
10096
10097 return R10_REG;
10098 }
10099 else
10100 {
10101 /* Use DI for nested function or function need static chain.
10102 Since function with tail call may use any caller-saved
10103 registers in epilogue, DRAP must not use caller-saved
10104 register in such case. */
10105 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10106 return DI_REG;
10107
10108 /* Reuse static chain register if it isn't used for parameter
10109 passing. */
10110 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10111 {
10112 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10113 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10114 return CX_REG;
10115 }
10116 return DI_REG;
10117 }
10118 }
10119
10120 /* Return minimum incoming stack alignment. */
10121
10122 static unsigned int
10123 ix86_minimum_incoming_stack_boundary (bool sibcall)
10124 {
10125 unsigned int incoming_stack_boundary;
10126
10127 /* Prefer the one specified at command line. */
10128 if (ix86_user_incoming_stack_boundary)
10129 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10130 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10131 if -mstackrealign is used, it isn't used for sibcall check and
10132 estimated stack alignment is 128bit. */
10133 else if (!sibcall
10134 && !TARGET_64BIT
10135 && ix86_force_align_arg_pointer
10136 && crtl->stack_alignment_estimated == 128)
10137 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10138 else
10139 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10140
10141 /* Incoming stack alignment can be changed on individual functions
10142 via force_align_arg_pointer attribute. We use the smallest
10143 incoming stack boundary. */
10144 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10145 && lookup_attribute (ix86_force_align_arg_pointer_string,
10146 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10147 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10148
10149 /* The incoming stack frame has to be aligned at least at
10150 parm_stack_boundary. */
10151 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10152 incoming_stack_boundary = crtl->parm_stack_boundary;
10153
10154 /* Stack at entrance of main is aligned by runtime. We use the
10155 smallest incoming stack boundary. */
10156 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10157 && DECL_NAME (current_function_decl)
10158 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10159 && DECL_FILE_SCOPE_P (current_function_decl))
10160 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10161
10162 return incoming_stack_boundary;
10163 }
10164
10165 /* Update incoming stack boundary and estimated stack alignment. */
10166
10167 static void
10168 ix86_update_stack_boundary (void)
10169 {
10170 ix86_incoming_stack_boundary
10171 = ix86_minimum_incoming_stack_boundary (false);
10172
10173 /* x86_64 vararg needs 16byte stack alignment for register save
10174 area. */
10175 if (TARGET_64BIT
10176 && cfun->stdarg
10177 && crtl->stack_alignment_estimated < 128)
10178 crtl->stack_alignment_estimated = 128;
10179 }
10180
10181 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10182 needed or an rtx for DRAP otherwise. */
10183
10184 static rtx
10185 ix86_get_drap_rtx (void)
10186 {
10187 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10188 crtl->need_drap = true;
10189
10190 if (stack_realign_drap)
10191 {
10192 /* Assign DRAP to vDRAP and returns vDRAP */
10193 unsigned int regno = find_drap_reg ();
10194 rtx drap_vreg;
10195 rtx arg_ptr;
10196 rtx seq, insn;
10197
10198 arg_ptr = gen_rtx_REG (Pmode, regno);
10199 crtl->drap_reg = arg_ptr;
10200
10201 start_sequence ();
10202 drap_vreg = copy_to_reg (arg_ptr);
10203 seq = get_insns ();
10204 end_sequence ();
10205
10206 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10207 if (!optimize)
10208 {
10209 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10210 RTX_FRAME_RELATED_P (insn) = 1;
10211 }
10212 return drap_vreg;
10213 }
10214 else
10215 return NULL;
10216 }
10217
10218 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10219
10220 static rtx
10221 ix86_internal_arg_pointer (void)
10222 {
10223 return virtual_incoming_args_rtx;
10224 }
10225
10226 struct scratch_reg {
10227 rtx reg;
10228 bool saved;
10229 };
10230
10231 /* Return a short-lived scratch register for use on function entry.
10232 In 32-bit mode, it is valid only after the registers are saved
10233 in the prologue. This register must be released by means of
10234 release_scratch_register_on_entry once it is dead. */
10235
10236 static void
10237 get_scratch_register_on_entry (struct scratch_reg *sr)
10238 {
10239 int regno;
10240
10241 sr->saved = false;
10242
10243 if (TARGET_64BIT)
10244 {
10245 /* We always use R11 in 64-bit mode. */
10246 regno = R11_REG;
10247 }
10248 else
10249 {
10250 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10251 bool fastcall_p
10252 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10253 bool thiscall_p
10254 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10255 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10256 int regparm = ix86_function_regparm (fntype, decl);
10257 int drap_regno
10258 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10259
10260 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10261 for the static chain register. */
10262 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10263 && drap_regno != AX_REG)
10264 regno = AX_REG;
10265 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10266 for the static chain register. */
10267 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10268 regno = AX_REG;
10269 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10270 regno = DX_REG;
10271 /* ecx is the static chain register. */
10272 else if (regparm < 3 && !fastcall_p && !thiscall_p
10273 && !static_chain_p
10274 && drap_regno != CX_REG)
10275 regno = CX_REG;
10276 else if (ix86_save_reg (BX_REG, true))
10277 regno = BX_REG;
10278 /* esi is the static chain register. */
10279 else if (!(regparm == 3 && static_chain_p)
10280 && ix86_save_reg (SI_REG, true))
10281 regno = SI_REG;
10282 else if (ix86_save_reg (DI_REG, true))
10283 regno = DI_REG;
10284 else
10285 {
10286 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10287 sr->saved = true;
10288 }
10289 }
10290
10291 sr->reg = gen_rtx_REG (Pmode, regno);
10292 if (sr->saved)
10293 {
10294 rtx insn = emit_insn (gen_push (sr->reg));
10295 RTX_FRAME_RELATED_P (insn) = 1;
10296 }
10297 }
10298
10299 /* Release a scratch register obtained from the preceding function. */
10300
10301 static void
10302 release_scratch_register_on_entry (struct scratch_reg *sr)
10303 {
10304 if (sr->saved)
10305 {
10306 struct machine_function *m = cfun->machine;
10307 rtx x, insn = emit_insn (gen_pop (sr->reg));
10308
10309 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10310 RTX_FRAME_RELATED_P (insn) = 1;
10311 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10312 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10313 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10314 m->fs.sp_offset -= UNITS_PER_WORD;
10315 }
10316 }
10317
10318 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10319
10320 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10321
10322 static void
10323 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10324 {
10325 /* We skip the probe for the first interval + a small dope of 4 words and
10326 probe that many bytes past the specified size to maintain a protection
10327 area at the botton of the stack. */
10328 const int dope = 4 * UNITS_PER_WORD;
10329 rtx size_rtx = GEN_INT (size), last;
10330
10331 /* See if we have a constant small number of probes to generate. If so,
10332 that's the easy case. The run-time loop is made up of 11 insns in the
10333 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10334 for n # of intervals. */
10335 if (size <= 5 * PROBE_INTERVAL)
10336 {
10337 HOST_WIDE_INT i, adjust;
10338 bool first_probe = true;
10339
10340 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10341 values of N from 1 until it exceeds SIZE. If only one probe is
10342 needed, this will not generate any code. Then adjust and probe
10343 to PROBE_INTERVAL + SIZE. */
10344 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10345 {
10346 if (first_probe)
10347 {
10348 adjust = 2 * PROBE_INTERVAL + dope;
10349 first_probe = false;
10350 }
10351 else
10352 adjust = PROBE_INTERVAL;
10353
10354 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10355 plus_constant (Pmode, stack_pointer_rtx,
10356 -adjust)));
10357 emit_stack_probe (stack_pointer_rtx);
10358 }
10359
10360 if (first_probe)
10361 adjust = size + PROBE_INTERVAL + dope;
10362 else
10363 adjust = size + PROBE_INTERVAL - i;
10364
10365 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10366 plus_constant (Pmode, stack_pointer_rtx,
10367 -adjust)));
10368 emit_stack_probe (stack_pointer_rtx);
10369
10370 /* Adjust back to account for the additional first interval. */
10371 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10372 plus_constant (Pmode, stack_pointer_rtx,
10373 PROBE_INTERVAL + dope)));
10374 }
10375
10376 /* Otherwise, do the same as above, but in a loop. Note that we must be
10377 extra careful with variables wrapping around because we might be at
10378 the very top (or the very bottom) of the address space and we have
10379 to be able to handle this case properly; in particular, we use an
10380 equality test for the loop condition. */
10381 else
10382 {
10383 HOST_WIDE_INT rounded_size;
10384 struct scratch_reg sr;
10385
10386 get_scratch_register_on_entry (&sr);
10387
10388
10389 /* Step 1: round SIZE to the previous multiple of the interval. */
10390
10391 rounded_size = size & -PROBE_INTERVAL;
10392
10393
10394 /* Step 2: compute initial and final value of the loop counter. */
10395
10396 /* SP = SP_0 + PROBE_INTERVAL. */
10397 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10398 plus_constant (Pmode, stack_pointer_rtx,
10399 - (PROBE_INTERVAL + dope))));
10400
10401 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10402 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10403 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10404 gen_rtx_PLUS (Pmode, sr.reg,
10405 stack_pointer_rtx)));
10406
10407
10408 /* Step 3: the loop
10409
10410 while (SP != LAST_ADDR)
10411 {
10412 SP = SP + PROBE_INTERVAL
10413 probe at SP
10414 }
10415
10416 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10417 values of N from 1 until it is equal to ROUNDED_SIZE. */
10418
10419 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10420
10421
10422 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10423 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10424
10425 if (size != rounded_size)
10426 {
10427 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10428 plus_constant (Pmode, stack_pointer_rtx,
10429 rounded_size - size)));
10430 emit_stack_probe (stack_pointer_rtx);
10431 }
10432
10433 /* Adjust back to account for the additional first interval. */
10434 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10435 plus_constant (Pmode, stack_pointer_rtx,
10436 PROBE_INTERVAL + dope)));
10437
10438 release_scratch_register_on_entry (&sr);
10439 }
10440
10441 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10442
10443 /* Even if the stack pointer isn't the CFA register, we need to correctly
10444 describe the adjustments made to it, in particular differentiate the
10445 frame-related ones from the frame-unrelated ones. */
10446 if (size > 0)
10447 {
10448 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10449 XVECEXP (expr, 0, 0)
10450 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10451 plus_constant (Pmode, stack_pointer_rtx, -size));
10452 XVECEXP (expr, 0, 1)
10453 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10454 plus_constant (Pmode, stack_pointer_rtx,
10455 PROBE_INTERVAL + dope + size));
10456 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10457 RTX_FRAME_RELATED_P (last) = 1;
10458
10459 cfun->machine->fs.sp_offset += size;
10460 }
10461
10462 /* Make sure nothing is scheduled before we are done. */
10463 emit_insn (gen_blockage ());
10464 }
10465
10466 /* Adjust the stack pointer up to REG while probing it. */
10467
10468 const char *
10469 output_adjust_stack_and_probe (rtx reg)
10470 {
10471 static int labelno = 0;
10472 char loop_lab[32], end_lab[32];
10473 rtx xops[2];
10474
10475 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10476 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10477
10478 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10479
10480 /* Jump to END_LAB if SP == LAST_ADDR. */
10481 xops[0] = stack_pointer_rtx;
10482 xops[1] = reg;
10483 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10484 fputs ("\tje\t", asm_out_file);
10485 assemble_name_raw (asm_out_file, end_lab);
10486 fputc ('\n', asm_out_file);
10487
10488 /* SP = SP + PROBE_INTERVAL. */
10489 xops[1] = GEN_INT (PROBE_INTERVAL);
10490 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10491
10492 /* Probe at SP. */
10493 xops[1] = const0_rtx;
10494 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10495
10496 fprintf (asm_out_file, "\tjmp\t");
10497 assemble_name_raw (asm_out_file, loop_lab);
10498 fputc ('\n', asm_out_file);
10499
10500 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10501
10502 return "";
10503 }
10504
10505 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10506 inclusive. These are offsets from the current stack pointer. */
10507
10508 static void
10509 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10510 {
10511 /* See if we have a constant small number of probes to generate. If so,
10512 that's the easy case. The run-time loop is made up of 7 insns in the
10513 generic case while the compile-time loop is made up of n insns for n #
10514 of intervals. */
10515 if (size <= 7 * PROBE_INTERVAL)
10516 {
10517 HOST_WIDE_INT i;
10518
10519 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10520 it exceeds SIZE. If only one probe is needed, this will not
10521 generate any code. Then probe at FIRST + SIZE. */
10522 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10523 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10524 -(first + i)));
10525
10526 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10527 -(first + size)));
10528 }
10529
10530 /* Otherwise, do the same as above, but in a loop. Note that we must be
10531 extra careful with variables wrapping around because we might be at
10532 the very top (or the very bottom) of the address space and we have
10533 to be able to handle this case properly; in particular, we use an
10534 equality test for the loop condition. */
10535 else
10536 {
10537 HOST_WIDE_INT rounded_size, last;
10538 struct scratch_reg sr;
10539
10540 get_scratch_register_on_entry (&sr);
10541
10542
10543 /* Step 1: round SIZE to the previous multiple of the interval. */
10544
10545 rounded_size = size & -PROBE_INTERVAL;
10546
10547
10548 /* Step 2: compute initial and final value of the loop counter. */
10549
10550 /* TEST_OFFSET = FIRST. */
10551 emit_move_insn (sr.reg, GEN_INT (-first));
10552
10553 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10554 last = first + rounded_size;
10555
10556
10557 /* Step 3: the loop
10558
10559 while (TEST_ADDR != LAST_ADDR)
10560 {
10561 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10562 probe at TEST_ADDR
10563 }
10564
10565 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10566 until it is equal to ROUNDED_SIZE. */
10567
10568 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10569
10570
10571 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10572 that SIZE is equal to ROUNDED_SIZE. */
10573
10574 if (size != rounded_size)
10575 emit_stack_probe (plus_constant (Pmode,
10576 gen_rtx_PLUS (Pmode,
10577 stack_pointer_rtx,
10578 sr.reg),
10579 rounded_size - size));
10580
10581 release_scratch_register_on_entry (&sr);
10582 }
10583
10584 /* Make sure nothing is scheduled before we are done. */
10585 emit_insn (gen_blockage ());
10586 }
10587
10588 /* Probe a range of stack addresses from REG to END, inclusive. These are
10589 offsets from the current stack pointer. */
10590
10591 const char *
10592 output_probe_stack_range (rtx reg, rtx end)
10593 {
10594 static int labelno = 0;
10595 char loop_lab[32], end_lab[32];
10596 rtx xops[3];
10597
10598 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10599 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10600
10601 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10602
10603 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10604 xops[0] = reg;
10605 xops[1] = end;
10606 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10607 fputs ("\tje\t", asm_out_file);
10608 assemble_name_raw (asm_out_file, end_lab);
10609 fputc ('\n', asm_out_file);
10610
10611 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10612 xops[1] = GEN_INT (PROBE_INTERVAL);
10613 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10614
10615 /* Probe at TEST_ADDR. */
10616 xops[0] = stack_pointer_rtx;
10617 xops[1] = reg;
10618 xops[2] = const0_rtx;
10619 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10620
10621 fprintf (asm_out_file, "\tjmp\t");
10622 assemble_name_raw (asm_out_file, loop_lab);
10623 fputc ('\n', asm_out_file);
10624
10625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10626
10627 return "";
10628 }
10629
10630 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10631 to be generated in correct form. */
10632 static void
10633 ix86_finalize_stack_realign_flags (void)
10634 {
10635 /* Check if stack realign is really needed after reload, and
10636 stores result in cfun */
10637 unsigned int incoming_stack_boundary
10638 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10639 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10640 unsigned int stack_realign = (incoming_stack_boundary
10641 < (crtl->is_leaf
10642 ? crtl->max_used_stack_slot_alignment
10643 : crtl->stack_alignment_needed));
10644
10645 if (crtl->stack_realign_finalized)
10646 {
10647 /* After stack_realign_needed is finalized, we can't no longer
10648 change it. */
10649 gcc_assert (crtl->stack_realign_needed == stack_realign);
10650 return;
10651 }
10652
10653 /* If the only reason for frame_pointer_needed is that we conservatively
10654 assumed stack realignment might be needed, but in the end nothing that
10655 needed the stack alignment had been spilled, clear frame_pointer_needed
10656 and say we don't need stack realignment. */
10657 if (stack_realign
10658 && frame_pointer_needed
10659 && crtl->is_leaf
10660 && flag_omit_frame_pointer
10661 && crtl->sp_is_unchanging
10662 && !ix86_current_function_calls_tls_descriptor
10663 && !crtl->accesses_prior_frames
10664 && !cfun->calls_alloca
10665 && !crtl->calls_eh_return
10666 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10667 && !ix86_frame_pointer_required ()
10668 && get_frame_size () == 0
10669 && ix86_nsaved_sseregs () == 0
10670 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10671 {
10672 HARD_REG_SET set_up_by_prologue, prologue_used;
10673 basic_block bb;
10674
10675 CLEAR_HARD_REG_SET (prologue_used);
10676 CLEAR_HARD_REG_SET (set_up_by_prologue);
10677 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10678 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10679 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10680 HARD_FRAME_POINTER_REGNUM);
10681 FOR_EACH_BB_FN (bb, cfun)
10682 {
10683 rtx insn;
10684 FOR_BB_INSNS (bb, insn)
10685 if (NONDEBUG_INSN_P (insn)
10686 && requires_stack_frame_p (insn, prologue_used,
10687 set_up_by_prologue))
10688 {
10689 crtl->stack_realign_needed = stack_realign;
10690 crtl->stack_realign_finalized = true;
10691 return;
10692 }
10693 }
10694
10695 /* If drap has been set, but it actually isn't live at the start
10696 of the function, there is no reason to set it up. */
10697 if (crtl->drap_reg)
10698 {
10699 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10700 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10701 {
10702 crtl->drap_reg = NULL_RTX;
10703 crtl->need_drap = false;
10704 }
10705 }
10706 else
10707 cfun->machine->no_drap_save_restore = true;
10708
10709 frame_pointer_needed = false;
10710 stack_realign = false;
10711 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10712 crtl->stack_alignment_needed = incoming_stack_boundary;
10713 crtl->stack_alignment_estimated = incoming_stack_boundary;
10714 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10715 crtl->preferred_stack_boundary = incoming_stack_boundary;
10716 df_finish_pass (true);
10717 df_scan_alloc (NULL);
10718 df_scan_blocks ();
10719 df_compute_regs_ever_live (true);
10720 df_analyze ();
10721 }
10722
10723 crtl->stack_realign_needed = stack_realign;
10724 crtl->stack_realign_finalized = true;
10725 }
10726
10727 /* Expand the prologue into a bunch of separate insns. */
10728
10729 void
10730 ix86_expand_prologue (void)
10731 {
10732 struct machine_function *m = cfun->machine;
10733 rtx insn, t;
10734 bool pic_reg_used;
10735 struct ix86_frame frame;
10736 HOST_WIDE_INT allocate;
10737 bool int_registers_saved;
10738 bool sse_registers_saved;
10739
10740 ix86_finalize_stack_realign_flags ();
10741
10742 /* DRAP should not coexist with stack_realign_fp */
10743 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10744
10745 memset (&m->fs, 0, sizeof (m->fs));
10746
10747 /* Initialize CFA state for before the prologue. */
10748 m->fs.cfa_reg = stack_pointer_rtx;
10749 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10750
10751 /* Track SP offset to the CFA. We continue tracking this after we've
10752 swapped the CFA register away from SP. In the case of re-alignment
10753 this is fudged; we're interested to offsets within the local frame. */
10754 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10755 m->fs.sp_valid = true;
10756
10757 ix86_compute_frame_layout (&frame);
10758
10759 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10760 {
10761 /* We should have already generated an error for any use of
10762 ms_hook on a nested function. */
10763 gcc_checking_assert (!ix86_static_chain_on_stack);
10764
10765 /* Check if profiling is active and we shall use profiling before
10766 prologue variant. If so sorry. */
10767 if (crtl->profile && flag_fentry != 0)
10768 sorry ("ms_hook_prologue attribute isn%'t compatible "
10769 "with -mfentry for 32-bit");
10770
10771 /* In ix86_asm_output_function_label we emitted:
10772 8b ff movl.s %edi,%edi
10773 55 push %ebp
10774 8b ec movl.s %esp,%ebp
10775
10776 This matches the hookable function prologue in Win32 API
10777 functions in Microsoft Windows XP Service Pack 2 and newer.
10778 Wine uses this to enable Windows apps to hook the Win32 API
10779 functions provided by Wine.
10780
10781 What that means is that we've already set up the frame pointer. */
10782
10783 if (frame_pointer_needed
10784 && !(crtl->drap_reg && crtl->stack_realign_needed))
10785 {
10786 rtx push, mov;
10787
10788 /* We've decided to use the frame pointer already set up.
10789 Describe this to the unwinder by pretending that both
10790 push and mov insns happen right here.
10791
10792 Putting the unwind info here at the end of the ms_hook
10793 is done so that we can make absolutely certain we get
10794 the required byte sequence at the start of the function,
10795 rather than relying on an assembler that can produce
10796 the exact encoding required.
10797
10798 However it does mean (in the unpatched case) that we have
10799 a 1 insn window where the asynchronous unwind info is
10800 incorrect. However, if we placed the unwind info at
10801 its correct location we would have incorrect unwind info
10802 in the patched case. Which is probably all moot since
10803 I don't expect Wine generates dwarf2 unwind info for the
10804 system libraries that use this feature. */
10805
10806 insn = emit_insn (gen_blockage ());
10807
10808 push = gen_push (hard_frame_pointer_rtx);
10809 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10810 stack_pointer_rtx);
10811 RTX_FRAME_RELATED_P (push) = 1;
10812 RTX_FRAME_RELATED_P (mov) = 1;
10813
10814 RTX_FRAME_RELATED_P (insn) = 1;
10815 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10816 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10817
10818 /* Note that gen_push incremented m->fs.cfa_offset, even
10819 though we didn't emit the push insn here. */
10820 m->fs.cfa_reg = hard_frame_pointer_rtx;
10821 m->fs.fp_offset = m->fs.cfa_offset;
10822 m->fs.fp_valid = true;
10823 }
10824 else
10825 {
10826 /* The frame pointer is not needed so pop %ebp again.
10827 This leaves us with a pristine state. */
10828 emit_insn (gen_pop (hard_frame_pointer_rtx));
10829 }
10830 }
10831
10832 /* The first insn of a function that accepts its static chain on the
10833 stack is to push the register that would be filled in by a direct
10834 call. This insn will be skipped by the trampoline. */
10835 else if (ix86_static_chain_on_stack)
10836 {
10837 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10838 emit_insn (gen_blockage ());
10839
10840 /* We don't want to interpret this push insn as a register save,
10841 only as a stack adjustment. The real copy of the register as
10842 a save will be done later, if needed. */
10843 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10844 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10845 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10846 RTX_FRAME_RELATED_P (insn) = 1;
10847 }
10848
10849 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10850 of DRAP is needed and stack realignment is really needed after reload */
10851 if (stack_realign_drap)
10852 {
10853 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10854
10855 /* Only need to push parameter pointer reg if it is caller saved. */
10856 if (!call_used_regs[REGNO (crtl->drap_reg)])
10857 {
10858 /* Push arg pointer reg */
10859 insn = emit_insn (gen_push (crtl->drap_reg));
10860 RTX_FRAME_RELATED_P (insn) = 1;
10861 }
10862
10863 /* Grab the argument pointer. */
10864 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10865 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10866 RTX_FRAME_RELATED_P (insn) = 1;
10867 m->fs.cfa_reg = crtl->drap_reg;
10868 m->fs.cfa_offset = 0;
10869
10870 /* Align the stack. */
10871 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10872 stack_pointer_rtx,
10873 GEN_INT (-align_bytes)));
10874 RTX_FRAME_RELATED_P (insn) = 1;
10875
10876 /* Replicate the return address on the stack so that return
10877 address can be reached via (argp - 1) slot. This is needed
10878 to implement macro RETURN_ADDR_RTX and intrinsic function
10879 expand_builtin_return_addr etc. */
10880 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10881 t = gen_frame_mem (word_mode, t);
10882 insn = emit_insn (gen_push (t));
10883 RTX_FRAME_RELATED_P (insn) = 1;
10884
10885 /* For the purposes of frame and register save area addressing,
10886 we've started over with a new frame. */
10887 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10888 m->fs.realigned = true;
10889 }
10890
10891 int_registers_saved = (frame.nregs == 0);
10892 sse_registers_saved = (frame.nsseregs == 0);
10893
10894 if (frame_pointer_needed && !m->fs.fp_valid)
10895 {
10896 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10897 slower on all targets. Also sdb doesn't like it. */
10898 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10899 RTX_FRAME_RELATED_P (insn) = 1;
10900
10901 /* Push registers now, before setting the frame pointer
10902 on SEH target. */
10903 if (!int_registers_saved
10904 && TARGET_SEH
10905 && !frame.save_regs_using_mov)
10906 {
10907 ix86_emit_save_regs ();
10908 int_registers_saved = true;
10909 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10910 }
10911
10912 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10913 {
10914 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10915 RTX_FRAME_RELATED_P (insn) = 1;
10916
10917 if (m->fs.cfa_reg == stack_pointer_rtx)
10918 m->fs.cfa_reg = hard_frame_pointer_rtx;
10919 m->fs.fp_offset = m->fs.sp_offset;
10920 m->fs.fp_valid = true;
10921 }
10922 }
10923
10924 if (!int_registers_saved)
10925 {
10926 /* If saving registers via PUSH, do so now. */
10927 if (!frame.save_regs_using_mov)
10928 {
10929 ix86_emit_save_regs ();
10930 int_registers_saved = true;
10931 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10932 }
10933
10934 /* When using red zone we may start register saving before allocating
10935 the stack frame saving one cycle of the prologue. However, avoid
10936 doing this if we have to probe the stack; at least on x86_64 the
10937 stack probe can turn into a call that clobbers a red zone location. */
10938 else if (ix86_using_red_zone ()
10939 && (! TARGET_STACK_PROBE
10940 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10941 {
10942 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10943 int_registers_saved = true;
10944 }
10945 }
10946
10947 if (stack_realign_fp)
10948 {
10949 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10950 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10951
10952 /* The computation of the size of the re-aligned stack frame means
10953 that we must allocate the size of the register save area before
10954 performing the actual alignment. Otherwise we cannot guarantee
10955 that there's enough storage above the realignment point. */
10956 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10957 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10958 GEN_INT (m->fs.sp_offset
10959 - frame.sse_reg_save_offset),
10960 -1, false);
10961
10962 /* Align the stack. */
10963 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10964 stack_pointer_rtx,
10965 GEN_INT (-align_bytes)));
10966
10967 /* For the purposes of register save area addressing, the stack
10968 pointer is no longer valid. As for the value of sp_offset,
10969 see ix86_compute_frame_layout, which we need to match in order
10970 to pass verification of stack_pointer_offset at the end. */
10971 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10972 m->fs.sp_valid = false;
10973 }
10974
10975 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10976
10977 if (flag_stack_usage_info)
10978 {
10979 /* We start to count from ARG_POINTER. */
10980 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10981
10982 /* If it was realigned, take into account the fake frame. */
10983 if (stack_realign_drap)
10984 {
10985 if (ix86_static_chain_on_stack)
10986 stack_size += UNITS_PER_WORD;
10987
10988 if (!call_used_regs[REGNO (crtl->drap_reg)])
10989 stack_size += UNITS_PER_WORD;
10990
10991 /* This over-estimates by 1 minimal-stack-alignment-unit but
10992 mitigates that by counting in the new return address slot. */
10993 current_function_dynamic_stack_size
10994 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10995 }
10996
10997 current_function_static_stack_size = stack_size;
10998 }
10999
11000 /* On SEH target with very large frame size, allocate an area to save
11001 SSE registers (as the very large allocation won't be described). */
11002 if (TARGET_SEH
11003 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11004 && !sse_registers_saved)
11005 {
11006 HOST_WIDE_INT sse_size =
11007 frame.sse_reg_save_offset - frame.reg_save_offset;
11008
11009 gcc_assert (int_registers_saved);
11010
11011 /* No need to do stack checking as the area will be immediately
11012 written. */
11013 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11014 GEN_INT (-sse_size), -1,
11015 m->fs.cfa_reg == stack_pointer_rtx);
11016 allocate -= sse_size;
11017 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11018 sse_registers_saved = true;
11019 }
11020
11021 /* The stack has already been decremented by the instruction calling us
11022 so probe if the size is non-negative to preserve the protection area. */
11023 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11024 {
11025 /* We expect the registers to be saved when probes are used. */
11026 gcc_assert (int_registers_saved);
11027
11028 if (STACK_CHECK_MOVING_SP)
11029 {
11030 if (!(crtl->is_leaf && !cfun->calls_alloca
11031 && allocate <= PROBE_INTERVAL))
11032 {
11033 ix86_adjust_stack_and_probe (allocate);
11034 allocate = 0;
11035 }
11036 }
11037 else
11038 {
11039 HOST_WIDE_INT size = allocate;
11040
11041 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11042 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11043
11044 if (TARGET_STACK_PROBE)
11045 {
11046 if (crtl->is_leaf && !cfun->calls_alloca)
11047 {
11048 if (size > PROBE_INTERVAL)
11049 ix86_emit_probe_stack_range (0, size);
11050 }
11051 else
11052 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11053 }
11054 else
11055 {
11056 if (crtl->is_leaf && !cfun->calls_alloca)
11057 {
11058 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11059 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11060 size - STACK_CHECK_PROTECT);
11061 }
11062 else
11063 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11064 }
11065 }
11066 }
11067
11068 if (allocate == 0)
11069 ;
11070 else if (!ix86_target_stack_probe ()
11071 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11072 {
11073 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11074 GEN_INT (-allocate), -1,
11075 m->fs.cfa_reg == stack_pointer_rtx);
11076 }
11077 else
11078 {
11079 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11080 rtx r10 = NULL;
11081 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11082 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11083 bool eax_live = ix86_eax_live_at_start_p ();
11084 bool r10_live = false;
11085
11086 if (TARGET_64BIT)
11087 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11088
11089 if (eax_live)
11090 {
11091 insn = emit_insn (gen_push (eax));
11092 allocate -= UNITS_PER_WORD;
11093 /* Note that SEH directives need to continue tracking the stack
11094 pointer even after the frame pointer has been set up. */
11095 if (sp_is_cfa_reg || TARGET_SEH)
11096 {
11097 if (sp_is_cfa_reg)
11098 m->fs.cfa_offset += UNITS_PER_WORD;
11099 RTX_FRAME_RELATED_P (insn) = 1;
11100 }
11101 }
11102
11103 if (r10_live)
11104 {
11105 r10 = gen_rtx_REG (Pmode, R10_REG);
11106 insn = emit_insn (gen_push (r10));
11107 allocate -= UNITS_PER_WORD;
11108 if (sp_is_cfa_reg || TARGET_SEH)
11109 {
11110 if (sp_is_cfa_reg)
11111 m->fs.cfa_offset += UNITS_PER_WORD;
11112 RTX_FRAME_RELATED_P (insn) = 1;
11113 }
11114 }
11115
11116 emit_move_insn (eax, GEN_INT (allocate));
11117 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11118
11119 /* Use the fact that AX still contains ALLOCATE. */
11120 adjust_stack_insn = (Pmode == DImode
11121 ? gen_pro_epilogue_adjust_stack_di_sub
11122 : gen_pro_epilogue_adjust_stack_si_sub);
11123
11124 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11125 stack_pointer_rtx, eax));
11126
11127 if (sp_is_cfa_reg || TARGET_SEH)
11128 {
11129 if (sp_is_cfa_reg)
11130 m->fs.cfa_offset += allocate;
11131 RTX_FRAME_RELATED_P (insn) = 1;
11132 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11133 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11134 plus_constant (Pmode, stack_pointer_rtx,
11135 -allocate)));
11136 }
11137 m->fs.sp_offset += allocate;
11138
11139 /* Use stack_pointer_rtx for relative addressing so that code
11140 works for realigned stack, too. */
11141 if (r10_live && eax_live)
11142 {
11143 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11144 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11145 gen_frame_mem (word_mode, t));
11146 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11147 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11148 gen_frame_mem (word_mode, t));
11149 }
11150 else if (eax_live || r10_live)
11151 {
11152 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11153 emit_move_insn (gen_rtx_REG (word_mode,
11154 (eax_live ? AX_REG : R10_REG)),
11155 gen_frame_mem (word_mode, t));
11156 }
11157 }
11158 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11159
11160 /* If we havn't already set up the frame pointer, do so now. */
11161 if (frame_pointer_needed && !m->fs.fp_valid)
11162 {
11163 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11164 GEN_INT (frame.stack_pointer_offset
11165 - frame.hard_frame_pointer_offset));
11166 insn = emit_insn (insn);
11167 RTX_FRAME_RELATED_P (insn) = 1;
11168 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11169
11170 if (m->fs.cfa_reg == stack_pointer_rtx)
11171 m->fs.cfa_reg = hard_frame_pointer_rtx;
11172 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11173 m->fs.fp_valid = true;
11174 }
11175
11176 if (!int_registers_saved)
11177 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11178 if (!sse_registers_saved)
11179 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11180
11181 pic_reg_used = false;
11182 /* We don't use pic-register for pe-coff target. */
11183 if (pic_offset_table_rtx
11184 && !TARGET_PECOFF
11185 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11186 || crtl->profile))
11187 {
11188 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11189
11190 if (alt_pic_reg_used != INVALID_REGNUM)
11191 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11192
11193 pic_reg_used = true;
11194 }
11195
11196 if (pic_reg_used)
11197 {
11198 if (TARGET_64BIT)
11199 {
11200 if (ix86_cmodel == CM_LARGE_PIC)
11201 {
11202 rtx label, tmp_reg;
11203
11204 gcc_assert (Pmode == DImode);
11205 label = gen_label_rtx ();
11206 emit_label (label);
11207 LABEL_PRESERVE_P (label) = 1;
11208 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11209 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11210 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11211 label));
11212 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11213 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11214 pic_offset_table_rtx, tmp_reg));
11215 }
11216 else
11217 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11218 }
11219 else
11220 {
11221 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11222 RTX_FRAME_RELATED_P (insn) = 1;
11223 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11224 }
11225 }
11226
11227 /* In the pic_reg_used case, make sure that the got load isn't deleted
11228 when mcount needs it. Blockage to avoid call movement across mcount
11229 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11230 note. */
11231 if (crtl->profile && !flag_fentry && pic_reg_used)
11232 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11233
11234 if (crtl->drap_reg && !crtl->stack_realign_needed)
11235 {
11236 /* vDRAP is setup but after reload it turns out stack realign
11237 isn't necessary, here we will emit prologue to setup DRAP
11238 without stack realign adjustment */
11239 t = choose_baseaddr (0);
11240 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11241 }
11242
11243 /* Prevent instructions from being scheduled into register save push
11244 sequence when access to the redzone area is done through frame pointer.
11245 The offset between the frame pointer and the stack pointer is calculated
11246 relative to the value of the stack pointer at the end of the function
11247 prologue, and moving instructions that access redzone area via frame
11248 pointer inside push sequence violates this assumption. */
11249 if (frame_pointer_needed && frame.red_zone_size)
11250 emit_insn (gen_memory_blockage ());
11251
11252 /* Emit cld instruction if stringops are used in the function. */
11253 if (TARGET_CLD && ix86_current_function_needs_cld)
11254 emit_insn (gen_cld ());
11255
11256 /* SEH requires that the prologue end within 256 bytes of the start of
11257 the function. Prevent instruction schedules that would extend that.
11258 Further, prevent alloca modifications to the stack pointer from being
11259 combined with prologue modifications. */
11260 if (TARGET_SEH)
11261 emit_insn (gen_prologue_use (stack_pointer_rtx));
11262 }
11263
11264 /* Emit code to restore REG using a POP insn. */
11265
11266 static void
11267 ix86_emit_restore_reg_using_pop (rtx reg)
11268 {
11269 struct machine_function *m = cfun->machine;
11270 rtx insn = emit_insn (gen_pop (reg));
11271
11272 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11273 m->fs.sp_offset -= UNITS_PER_WORD;
11274
11275 if (m->fs.cfa_reg == crtl->drap_reg
11276 && REGNO (reg) == REGNO (crtl->drap_reg))
11277 {
11278 /* Previously we'd represented the CFA as an expression
11279 like *(%ebp - 8). We've just popped that value from
11280 the stack, which means we need to reset the CFA to
11281 the drap register. This will remain until we restore
11282 the stack pointer. */
11283 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11284 RTX_FRAME_RELATED_P (insn) = 1;
11285
11286 /* This means that the DRAP register is valid for addressing too. */
11287 m->fs.drap_valid = true;
11288 return;
11289 }
11290
11291 if (m->fs.cfa_reg == stack_pointer_rtx)
11292 {
11293 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11294 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11295 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11296 RTX_FRAME_RELATED_P (insn) = 1;
11297
11298 m->fs.cfa_offset -= UNITS_PER_WORD;
11299 }
11300
11301 /* When the frame pointer is the CFA, and we pop it, we are
11302 swapping back to the stack pointer as the CFA. This happens
11303 for stack frames that don't allocate other data, so we assume
11304 the stack pointer is now pointing at the return address, i.e.
11305 the function entry state, which makes the offset be 1 word. */
11306 if (reg == hard_frame_pointer_rtx)
11307 {
11308 m->fs.fp_valid = false;
11309 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11310 {
11311 m->fs.cfa_reg = stack_pointer_rtx;
11312 m->fs.cfa_offset -= UNITS_PER_WORD;
11313
11314 add_reg_note (insn, REG_CFA_DEF_CFA,
11315 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11316 GEN_INT (m->fs.cfa_offset)));
11317 RTX_FRAME_RELATED_P (insn) = 1;
11318 }
11319 }
11320 }
11321
11322 /* Emit code to restore saved registers using POP insns. */
11323
11324 static void
11325 ix86_emit_restore_regs_using_pop (void)
11326 {
11327 unsigned int regno;
11328
11329 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11330 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11331 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11332 }
11333
11334 /* Emit code and notes for the LEAVE instruction. */
11335
11336 static void
11337 ix86_emit_leave (void)
11338 {
11339 struct machine_function *m = cfun->machine;
11340 rtx insn = emit_insn (ix86_gen_leave ());
11341
11342 ix86_add_queued_cfa_restore_notes (insn);
11343
11344 gcc_assert (m->fs.fp_valid);
11345 m->fs.sp_valid = true;
11346 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11347 m->fs.fp_valid = false;
11348
11349 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11350 {
11351 m->fs.cfa_reg = stack_pointer_rtx;
11352 m->fs.cfa_offset = m->fs.sp_offset;
11353
11354 add_reg_note (insn, REG_CFA_DEF_CFA,
11355 plus_constant (Pmode, stack_pointer_rtx,
11356 m->fs.sp_offset));
11357 RTX_FRAME_RELATED_P (insn) = 1;
11358 }
11359 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11360 m->fs.fp_offset);
11361 }
11362
11363 /* Emit code to restore saved registers using MOV insns.
11364 First register is restored from CFA - CFA_OFFSET. */
11365 static void
11366 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11367 bool maybe_eh_return)
11368 {
11369 struct machine_function *m = cfun->machine;
11370 unsigned int regno;
11371
11372 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11373 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11374 {
11375 rtx reg = gen_rtx_REG (word_mode, regno);
11376 rtx insn, mem;
11377
11378 mem = choose_baseaddr (cfa_offset);
11379 mem = gen_frame_mem (word_mode, mem);
11380 insn = emit_move_insn (reg, mem);
11381
11382 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11383 {
11384 /* Previously we'd represented the CFA as an expression
11385 like *(%ebp - 8). We've just popped that value from
11386 the stack, which means we need to reset the CFA to
11387 the drap register. This will remain until we restore
11388 the stack pointer. */
11389 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11390 RTX_FRAME_RELATED_P (insn) = 1;
11391
11392 /* This means that the DRAP register is valid for addressing. */
11393 m->fs.drap_valid = true;
11394 }
11395 else
11396 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11397
11398 cfa_offset -= UNITS_PER_WORD;
11399 }
11400 }
11401
11402 /* Emit code to restore saved registers using MOV insns.
11403 First register is restored from CFA - CFA_OFFSET. */
11404 static void
11405 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11406 bool maybe_eh_return)
11407 {
11408 unsigned int regno;
11409
11410 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11411 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11412 {
11413 rtx reg = gen_rtx_REG (V4SFmode, regno);
11414 rtx mem;
11415
11416 mem = choose_baseaddr (cfa_offset);
11417 mem = gen_rtx_MEM (V4SFmode, mem);
11418 set_mem_align (mem, 128);
11419 emit_move_insn (reg, mem);
11420
11421 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11422
11423 cfa_offset -= 16;
11424 }
11425 }
11426
11427 /* Restore function stack, frame, and registers. */
11428
11429 void
11430 ix86_expand_epilogue (int style)
11431 {
11432 struct machine_function *m = cfun->machine;
11433 struct machine_frame_state frame_state_save = m->fs;
11434 struct ix86_frame frame;
11435 bool restore_regs_via_mov;
11436 bool using_drap;
11437
11438 ix86_finalize_stack_realign_flags ();
11439 ix86_compute_frame_layout (&frame);
11440
11441 m->fs.sp_valid = (!frame_pointer_needed
11442 || (crtl->sp_is_unchanging
11443 && !stack_realign_fp));
11444 gcc_assert (!m->fs.sp_valid
11445 || m->fs.sp_offset == frame.stack_pointer_offset);
11446
11447 /* The FP must be valid if the frame pointer is present. */
11448 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11449 gcc_assert (!m->fs.fp_valid
11450 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11451
11452 /* We must have *some* valid pointer to the stack frame. */
11453 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11454
11455 /* The DRAP is never valid at this point. */
11456 gcc_assert (!m->fs.drap_valid);
11457
11458 /* See the comment about red zone and frame
11459 pointer usage in ix86_expand_prologue. */
11460 if (frame_pointer_needed && frame.red_zone_size)
11461 emit_insn (gen_memory_blockage ());
11462
11463 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11464 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11465
11466 /* Determine the CFA offset of the end of the red-zone. */
11467 m->fs.red_zone_offset = 0;
11468 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11469 {
11470 /* The red-zone begins below the return address. */
11471 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11472
11473 /* When the register save area is in the aligned portion of
11474 the stack, determine the maximum runtime displacement that
11475 matches up with the aligned frame. */
11476 if (stack_realign_drap)
11477 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11478 + UNITS_PER_WORD);
11479 }
11480
11481 /* Special care must be taken for the normal return case of a function
11482 using eh_return: the eax and edx registers are marked as saved, but
11483 not restored along this path. Adjust the save location to match. */
11484 if (crtl->calls_eh_return && style != 2)
11485 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11486
11487 /* EH_RETURN requires the use of moves to function properly. */
11488 if (crtl->calls_eh_return)
11489 restore_regs_via_mov = true;
11490 /* SEH requires the use of pops to identify the epilogue. */
11491 else if (TARGET_SEH)
11492 restore_regs_via_mov = false;
11493 /* If we're only restoring one register and sp is not valid then
11494 using a move instruction to restore the register since it's
11495 less work than reloading sp and popping the register. */
11496 else if (!m->fs.sp_valid && frame.nregs <= 1)
11497 restore_regs_via_mov = true;
11498 else if (TARGET_EPILOGUE_USING_MOVE
11499 && cfun->machine->use_fast_prologue_epilogue
11500 && (frame.nregs > 1
11501 || m->fs.sp_offset != frame.reg_save_offset))
11502 restore_regs_via_mov = true;
11503 else if (frame_pointer_needed
11504 && !frame.nregs
11505 && m->fs.sp_offset != frame.reg_save_offset)
11506 restore_regs_via_mov = true;
11507 else if (frame_pointer_needed
11508 && TARGET_USE_LEAVE
11509 && cfun->machine->use_fast_prologue_epilogue
11510 && frame.nregs == 1)
11511 restore_regs_via_mov = true;
11512 else
11513 restore_regs_via_mov = false;
11514
11515 if (restore_regs_via_mov || frame.nsseregs)
11516 {
11517 /* Ensure that the entire register save area is addressable via
11518 the stack pointer, if we will restore via sp. */
11519 if (TARGET_64BIT
11520 && m->fs.sp_offset > 0x7fffffff
11521 && !(m->fs.fp_valid || m->fs.drap_valid)
11522 && (frame.nsseregs + frame.nregs) != 0)
11523 {
11524 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11525 GEN_INT (m->fs.sp_offset
11526 - frame.sse_reg_save_offset),
11527 style,
11528 m->fs.cfa_reg == stack_pointer_rtx);
11529 }
11530 }
11531
11532 /* If there are any SSE registers to restore, then we have to do it
11533 via moves, since there's obviously no pop for SSE regs. */
11534 if (frame.nsseregs)
11535 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11536 style == 2);
11537
11538 if (restore_regs_via_mov)
11539 {
11540 rtx t;
11541
11542 if (frame.nregs)
11543 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11544
11545 /* eh_return epilogues need %ecx added to the stack pointer. */
11546 if (style == 2)
11547 {
11548 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11549
11550 /* Stack align doesn't work with eh_return. */
11551 gcc_assert (!stack_realign_drap);
11552 /* Neither does regparm nested functions. */
11553 gcc_assert (!ix86_static_chain_on_stack);
11554
11555 if (frame_pointer_needed)
11556 {
11557 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11558 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11559 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11560
11561 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11562 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11563
11564 /* Note that we use SA as a temporary CFA, as the return
11565 address is at the proper place relative to it. We
11566 pretend this happens at the FP restore insn because
11567 prior to this insn the FP would be stored at the wrong
11568 offset relative to SA, and after this insn we have no
11569 other reasonable register to use for the CFA. We don't
11570 bother resetting the CFA to the SP for the duration of
11571 the return insn. */
11572 add_reg_note (insn, REG_CFA_DEF_CFA,
11573 plus_constant (Pmode, sa, UNITS_PER_WORD));
11574 ix86_add_queued_cfa_restore_notes (insn);
11575 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11576 RTX_FRAME_RELATED_P (insn) = 1;
11577
11578 m->fs.cfa_reg = sa;
11579 m->fs.cfa_offset = UNITS_PER_WORD;
11580 m->fs.fp_valid = false;
11581
11582 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11583 const0_rtx, style, false);
11584 }
11585 else
11586 {
11587 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11588 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11589 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11590 ix86_add_queued_cfa_restore_notes (insn);
11591
11592 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11593 if (m->fs.cfa_offset != UNITS_PER_WORD)
11594 {
11595 m->fs.cfa_offset = UNITS_PER_WORD;
11596 add_reg_note (insn, REG_CFA_DEF_CFA,
11597 plus_constant (Pmode, stack_pointer_rtx,
11598 UNITS_PER_WORD));
11599 RTX_FRAME_RELATED_P (insn) = 1;
11600 }
11601 }
11602 m->fs.sp_offset = UNITS_PER_WORD;
11603 m->fs.sp_valid = true;
11604 }
11605 }
11606 else
11607 {
11608 /* SEH requires that the function end with (1) a stack adjustment
11609 if necessary, (2) a sequence of pops, and (3) a return or
11610 jump instruction. Prevent insns from the function body from
11611 being scheduled into this sequence. */
11612 if (TARGET_SEH)
11613 {
11614 /* Prevent a catch region from being adjacent to the standard
11615 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11616 several other flags that would be interesting to test are
11617 not yet set up. */
11618 if (flag_non_call_exceptions)
11619 emit_insn (gen_nops (const1_rtx));
11620 else
11621 emit_insn (gen_blockage ());
11622 }
11623
11624 /* First step is to deallocate the stack frame so that we can
11625 pop the registers. Also do it on SEH target for very large
11626 frame as the emitted instructions aren't allowed by the ABI in
11627 epilogues. */
11628 if (!m->fs.sp_valid
11629 || (TARGET_SEH
11630 && (m->fs.sp_offset - frame.reg_save_offset
11631 >= SEH_MAX_FRAME_SIZE)))
11632 {
11633 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11634 GEN_INT (m->fs.fp_offset
11635 - frame.reg_save_offset),
11636 style, false);
11637 }
11638 else if (m->fs.sp_offset != frame.reg_save_offset)
11639 {
11640 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11641 GEN_INT (m->fs.sp_offset
11642 - frame.reg_save_offset),
11643 style,
11644 m->fs.cfa_reg == stack_pointer_rtx);
11645 }
11646
11647 ix86_emit_restore_regs_using_pop ();
11648 }
11649
11650 /* If we used a stack pointer and haven't already got rid of it,
11651 then do so now. */
11652 if (m->fs.fp_valid)
11653 {
11654 /* If the stack pointer is valid and pointing at the frame
11655 pointer store address, then we only need a pop. */
11656 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11657 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11658 /* Leave results in shorter dependency chains on CPUs that are
11659 able to grok it fast. */
11660 else if (TARGET_USE_LEAVE
11661 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11662 || !cfun->machine->use_fast_prologue_epilogue)
11663 ix86_emit_leave ();
11664 else
11665 {
11666 pro_epilogue_adjust_stack (stack_pointer_rtx,
11667 hard_frame_pointer_rtx,
11668 const0_rtx, style, !using_drap);
11669 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11670 }
11671 }
11672
11673 if (using_drap)
11674 {
11675 int param_ptr_offset = UNITS_PER_WORD;
11676 rtx insn;
11677
11678 gcc_assert (stack_realign_drap);
11679
11680 if (ix86_static_chain_on_stack)
11681 param_ptr_offset += UNITS_PER_WORD;
11682 if (!call_used_regs[REGNO (crtl->drap_reg)])
11683 param_ptr_offset += UNITS_PER_WORD;
11684
11685 insn = emit_insn (gen_rtx_SET
11686 (VOIDmode, stack_pointer_rtx,
11687 gen_rtx_PLUS (Pmode,
11688 crtl->drap_reg,
11689 GEN_INT (-param_ptr_offset))));
11690 m->fs.cfa_reg = stack_pointer_rtx;
11691 m->fs.cfa_offset = param_ptr_offset;
11692 m->fs.sp_offset = param_ptr_offset;
11693 m->fs.realigned = false;
11694
11695 add_reg_note (insn, REG_CFA_DEF_CFA,
11696 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11697 GEN_INT (param_ptr_offset)));
11698 RTX_FRAME_RELATED_P (insn) = 1;
11699
11700 if (!call_used_regs[REGNO (crtl->drap_reg)])
11701 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11702 }
11703
11704 /* At this point the stack pointer must be valid, and we must have
11705 restored all of the registers. We may not have deallocated the
11706 entire stack frame. We've delayed this until now because it may
11707 be possible to merge the local stack deallocation with the
11708 deallocation forced by ix86_static_chain_on_stack. */
11709 gcc_assert (m->fs.sp_valid);
11710 gcc_assert (!m->fs.fp_valid);
11711 gcc_assert (!m->fs.realigned);
11712 if (m->fs.sp_offset != UNITS_PER_WORD)
11713 {
11714 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11715 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11716 style, true);
11717 }
11718 else
11719 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11720
11721 /* Sibcall epilogues don't want a return instruction. */
11722 if (style == 0)
11723 {
11724 m->fs = frame_state_save;
11725 return;
11726 }
11727
11728 if (crtl->args.pops_args && crtl->args.size)
11729 {
11730 rtx popc = GEN_INT (crtl->args.pops_args);
11731
11732 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11733 address, do explicit add, and jump indirectly to the caller. */
11734
11735 if (crtl->args.pops_args >= 65536)
11736 {
11737 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11738 rtx insn;
11739
11740 /* There is no "pascal" calling convention in any 64bit ABI. */
11741 gcc_assert (!TARGET_64BIT);
11742
11743 insn = emit_insn (gen_pop (ecx));
11744 m->fs.cfa_offset -= UNITS_PER_WORD;
11745 m->fs.sp_offset -= UNITS_PER_WORD;
11746
11747 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11748 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11749 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11750 add_reg_note (insn, REG_CFA_REGISTER,
11751 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11752 RTX_FRAME_RELATED_P (insn) = 1;
11753
11754 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11755 popc, -1, true);
11756 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11757 }
11758 else
11759 emit_jump_insn (gen_simple_return_pop_internal (popc));
11760 }
11761 else
11762 emit_jump_insn (gen_simple_return_internal ());
11763
11764 /* Restore the state back to the state from the prologue,
11765 so that it's correct for the next epilogue. */
11766 m->fs = frame_state_save;
11767 }
11768
11769 /* Reset from the function's potential modifications. */
11770
11771 static void
11772 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11773 {
11774 if (pic_offset_table_rtx)
11775 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11776 #if TARGET_MACHO
11777 /* Mach-O doesn't support labels at the end of objects, so if
11778 it looks like we might want one, insert a NOP. */
11779 {
11780 rtx insn = get_last_insn ();
11781 rtx deleted_debug_label = NULL_RTX;
11782 while (insn
11783 && NOTE_P (insn)
11784 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11785 {
11786 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11787 notes only, instead set their CODE_LABEL_NUMBER to -1,
11788 otherwise there would be code generation differences
11789 in between -g and -g0. */
11790 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11791 deleted_debug_label = insn;
11792 insn = PREV_INSN (insn);
11793 }
11794 if (insn
11795 && (LABEL_P (insn)
11796 || (NOTE_P (insn)
11797 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11798 fputs ("\tnop\n", file);
11799 else if (deleted_debug_label)
11800 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11801 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11802 CODE_LABEL_NUMBER (insn) = -1;
11803 }
11804 #endif
11805
11806 }
11807
11808 /* Return a scratch register to use in the split stack prologue. The
11809 split stack prologue is used for -fsplit-stack. It is the first
11810 instructions in the function, even before the regular prologue.
11811 The scratch register can be any caller-saved register which is not
11812 used for parameters or for the static chain. */
11813
11814 static unsigned int
11815 split_stack_prologue_scratch_regno (void)
11816 {
11817 if (TARGET_64BIT)
11818 return R11_REG;
11819 else
11820 {
11821 bool is_fastcall, is_thiscall;
11822 int regparm;
11823
11824 is_fastcall = (lookup_attribute ("fastcall",
11825 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11826 != NULL);
11827 is_thiscall = (lookup_attribute ("thiscall",
11828 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11829 != NULL);
11830 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11831
11832 if (is_fastcall)
11833 {
11834 if (DECL_STATIC_CHAIN (cfun->decl))
11835 {
11836 sorry ("-fsplit-stack does not support fastcall with "
11837 "nested function");
11838 return INVALID_REGNUM;
11839 }
11840 return AX_REG;
11841 }
11842 else if (is_thiscall)
11843 {
11844 if (!DECL_STATIC_CHAIN (cfun->decl))
11845 return DX_REG;
11846 return AX_REG;
11847 }
11848 else if (regparm < 3)
11849 {
11850 if (!DECL_STATIC_CHAIN (cfun->decl))
11851 return CX_REG;
11852 else
11853 {
11854 if (regparm >= 2)
11855 {
11856 sorry ("-fsplit-stack does not support 2 register "
11857 "parameters for a nested function");
11858 return INVALID_REGNUM;
11859 }
11860 return DX_REG;
11861 }
11862 }
11863 else
11864 {
11865 /* FIXME: We could make this work by pushing a register
11866 around the addition and comparison. */
11867 sorry ("-fsplit-stack does not support 3 register parameters");
11868 return INVALID_REGNUM;
11869 }
11870 }
11871 }
11872
11873 /* A SYMBOL_REF for the function which allocates new stackspace for
11874 -fsplit-stack. */
11875
11876 static GTY(()) rtx split_stack_fn;
11877
11878 /* A SYMBOL_REF for the more stack function when using the large
11879 model. */
11880
11881 static GTY(()) rtx split_stack_fn_large;
11882
11883 /* Handle -fsplit-stack. These are the first instructions in the
11884 function, even before the regular prologue. */
11885
11886 void
11887 ix86_expand_split_stack_prologue (void)
11888 {
11889 struct ix86_frame frame;
11890 HOST_WIDE_INT allocate;
11891 unsigned HOST_WIDE_INT args_size;
11892 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11893 rtx scratch_reg = NULL_RTX;
11894 rtx varargs_label = NULL_RTX;
11895 rtx fn;
11896
11897 gcc_assert (flag_split_stack && reload_completed);
11898
11899 ix86_finalize_stack_realign_flags ();
11900 ix86_compute_frame_layout (&frame);
11901 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11902
11903 /* This is the label we will branch to if we have enough stack
11904 space. We expect the basic block reordering pass to reverse this
11905 branch if optimizing, so that we branch in the unlikely case. */
11906 label = gen_label_rtx ();
11907
11908 /* We need to compare the stack pointer minus the frame size with
11909 the stack boundary in the TCB. The stack boundary always gives
11910 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11911 can compare directly. Otherwise we need to do an addition. */
11912
11913 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11914 UNSPEC_STACK_CHECK);
11915 limit = gen_rtx_CONST (Pmode, limit);
11916 limit = gen_rtx_MEM (Pmode, limit);
11917 if (allocate < SPLIT_STACK_AVAILABLE)
11918 current = stack_pointer_rtx;
11919 else
11920 {
11921 unsigned int scratch_regno;
11922 rtx offset;
11923
11924 /* We need a scratch register to hold the stack pointer minus
11925 the required frame size. Since this is the very start of the
11926 function, the scratch register can be any caller-saved
11927 register which is not used for parameters. */
11928 offset = GEN_INT (- allocate);
11929 scratch_regno = split_stack_prologue_scratch_regno ();
11930 if (scratch_regno == INVALID_REGNUM)
11931 return;
11932 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11933 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11934 {
11935 /* We don't use ix86_gen_add3 in this case because it will
11936 want to split to lea, but when not optimizing the insn
11937 will not be split after this point. */
11938 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11939 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11940 offset)));
11941 }
11942 else
11943 {
11944 emit_move_insn (scratch_reg, offset);
11945 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11946 stack_pointer_rtx));
11947 }
11948 current = scratch_reg;
11949 }
11950
11951 ix86_expand_branch (GEU, current, limit, label);
11952 jump_insn = get_last_insn ();
11953 JUMP_LABEL (jump_insn) = label;
11954
11955 /* Mark the jump as very likely to be taken. */
11956 add_int_reg_note (jump_insn, REG_BR_PROB,
11957 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11958
11959 if (split_stack_fn == NULL_RTX)
11960 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11961 fn = split_stack_fn;
11962
11963 /* Get more stack space. We pass in the desired stack space and the
11964 size of the arguments to copy to the new stack. In 32-bit mode
11965 we push the parameters; __morestack will return on a new stack
11966 anyhow. In 64-bit mode we pass the parameters in r10 and
11967 r11. */
11968 allocate_rtx = GEN_INT (allocate);
11969 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11970 call_fusage = NULL_RTX;
11971 if (TARGET_64BIT)
11972 {
11973 rtx reg10, reg11;
11974
11975 reg10 = gen_rtx_REG (Pmode, R10_REG);
11976 reg11 = gen_rtx_REG (Pmode, R11_REG);
11977
11978 /* If this function uses a static chain, it will be in %r10.
11979 Preserve it across the call to __morestack. */
11980 if (DECL_STATIC_CHAIN (cfun->decl))
11981 {
11982 rtx rax;
11983
11984 rax = gen_rtx_REG (word_mode, AX_REG);
11985 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11986 use_reg (&call_fusage, rax);
11987 }
11988
11989 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11990 && !TARGET_PECOFF)
11991 {
11992 HOST_WIDE_INT argval;
11993
11994 gcc_assert (Pmode == DImode);
11995 /* When using the large model we need to load the address
11996 into a register, and we've run out of registers. So we
11997 switch to a different calling convention, and we call a
11998 different function: __morestack_large. We pass the
11999 argument size in the upper 32 bits of r10 and pass the
12000 frame size in the lower 32 bits. */
12001 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12002 gcc_assert ((args_size & 0xffffffff) == args_size);
12003
12004 if (split_stack_fn_large == NULL_RTX)
12005 split_stack_fn_large =
12006 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12007
12008 if (ix86_cmodel == CM_LARGE_PIC)
12009 {
12010 rtx label, x;
12011
12012 label = gen_label_rtx ();
12013 emit_label (label);
12014 LABEL_PRESERVE_P (label) = 1;
12015 emit_insn (gen_set_rip_rex64 (reg10, label));
12016 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12017 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12018 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12019 UNSPEC_GOT);
12020 x = gen_rtx_CONST (Pmode, x);
12021 emit_move_insn (reg11, x);
12022 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12023 x = gen_const_mem (Pmode, x);
12024 emit_move_insn (reg11, x);
12025 }
12026 else
12027 emit_move_insn (reg11, split_stack_fn_large);
12028
12029 fn = reg11;
12030
12031 argval = ((args_size << 16) << 16) + allocate;
12032 emit_move_insn (reg10, GEN_INT (argval));
12033 }
12034 else
12035 {
12036 emit_move_insn (reg10, allocate_rtx);
12037 emit_move_insn (reg11, GEN_INT (args_size));
12038 use_reg (&call_fusage, reg11);
12039 }
12040
12041 use_reg (&call_fusage, reg10);
12042 }
12043 else
12044 {
12045 emit_insn (gen_push (GEN_INT (args_size)));
12046 emit_insn (gen_push (allocate_rtx));
12047 }
12048 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12049 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12050 NULL_RTX, false);
12051 add_function_usage_to (call_insn, call_fusage);
12052
12053 /* In order to make call/return prediction work right, we now need
12054 to execute a return instruction. See
12055 libgcc/config/i386/morestack.S for the details on how this works.
12056
12057 For flow purposes gcc must not see this as a return
12058 instruction--we need control flow to continue at the subsequent
12059 label. Therefore, we use an unspec. */
12060 gcc_assert (crtl->args.pops_args < 65536);
12061 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12062
12063 /* If we are in 64-bit mode and this function uses a static chain,
12064 we saved %r10 in %rax before calling _morestack. */
12065 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12066 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12067 gen_rtx_REG (word_mode, AX_REG));
12068
12069 /* If this function calls va_start, we need to store a pointer to
12070 the arguments on the old stack, because they may not have been
12071 all copied to the new stack. At this point the old stack can be
12072 found at the frame pointer value used by __morestack, because
12073 __morestack has set that up before calling back to us. Here we
12074 store that pointer in a scratch register, and in
12075 ix86_expand_prologue we store the scratch register in a stack
12076 slot. */
12077 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12078 {
12079 unsigned int scratch_regno;
12080 rtx frame_reg;
12081 int words;
12082
12083 scratch_regno = split_stack_prologue_scratch_regno ();
12084 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12085 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12086
12087 /* 64-bit:
12088 fp -> old fp value
12089 return address within this function
12090 return address of caller of this function
12091 stack arguments
12092 So we add three words to get to the stack arguments.
12093
12094 32-bit:
12095 fp -> old fp value
12096 return address within this function
12097 first argument to __morestack
12098 second argument to __morestack
12099 return address of caller of this function
12100 stack arguments
12101 So we add five words to get to the stack arguments.
12102 */
12103 words = TARGET_64BIT ? 3 : 5;
12104 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12105 gen_rtx_PLUS (Pmode, frame_reg,
12106 GEN_INT (words * UNITS_PER_WORD))));
12107
12108 varargs_label = gen_label_rtx ();
12109 emit_jump_insn (gen_jump (varargs_label));
12110 JUMP_LABEL (get_last_insn ()) = varargs_label;
12111
12112 emit_barrier ();
12113 }
12114
12115 emit_label (label);
12116 LABEL_NUSES (label) = 1;
12117
12118 /* If this function calls va_start, we now have to set the scratch
12119 register for the case where we do not call __morestack. In this
12120 case we need to set it based on the stack pointer. */
12121 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12122 {
12123 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12124 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12125 GEN_INT (UNITS_PER_WORD))));
12126
12127 emit_label (varargs_label);
12128 LABEL_NUSES (varargs_label) = 1;
12129 }
12130 }
12131
12132 /* We may have to tell the dataflow pass that the split stack prologue
12133 is initializing a scratch register. */
12134
12135 static void
12136 ix86_live_on_entry (bitmap regs)
12137 {
12138 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12139 {
12140 gcc_assert (flag_split_stack);
12141 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12142 }
12143 }
12144 \f
12145 /* Extract the parts of an RTL expression that is a valid memory address
12146 for an instruction. Return 0 if the structure of the address is
12147 grossly off. Return -1 if the address contains ASHIFT, so it is not
12148 strictly valid, but still used for computing length of lea instruction. */
12149
12150 int
12151 ix86_decompose_address (rtx addr, struct ix86_address *out)
12152 {
12153 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12154 rtx base_reg, index_reg;
12155 HOST_WIDE_INT scale = 1;
12156 rtx scale_rtx = NULL_RTX;
12157 rtx tmp;
12158 int retval = 1;
12159 enum ix86_address_seg seg = SEG_DEFAULT;
12160
12161 /* Allow zero-extended SImode addresses,
12162 they will be emitted with addr32 prefix. */
12163 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12164 {
12165 if (GET_CODE (addr) == ZERO_EXTEND
12166 && GET_MODE (XEXP (addr, 0)) == SImode)
12167 {
12168 addr = XEXP (addr, 0);
12169 if (CONST_INT_P (addr))
12170 return 0;
12171 }
12172 else if (GET_CODE (addr) == AND
12173 && const_32bit_mask (XEXP (addr, 1), DImode))
12174 {
12175 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12176 if (addr == NULL_RTX)
12177 return 0;
12178
12179 if (CONST_INT_P (addr))
12180 return 0;
12181 }
12182 }
12183
12184 /* Allow SImode subregs of DImode addresses,
12185 they will be emitted with addr32 prefix. */
12186 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12187 {
12188 if (GET_CODE (addr) == SUBREG
12189 && GET_MODE (SUBREG_REG (addr)) == DImode)
12190 {
12191 addr = SUBREG_REG (addr);
12192 if (CONST_INT_P (addr))
12193 return 0;
12194 }
12195 }
12196
12197 if (REG_P (addr))
12198 base = addr;
12199 else if (GET_CODE (addr) == SUBREG)
12200 {
12201 if (REG_P (SUBREG_REG (addr)))
12202 base = addr;
12203 else
12204 return 0;
12205 }
12206 else if (GET_CODE (addr) == PLUS)
12207 {
12208 rtx addends[4], op;
12209 int n = 0, i;
12210
12211 op = addr;
12212 do
12213 {
12214 if (n >= 4)
12215 return 0;
12216 addends[n++] = XEXP (op, 1);
12217 op = XEXP (op, 0);
12218 }
12219 while (GET_CODE (op) == PLUS);
12220 if (n >= 4)
12221 return 0;
12222 addends[n] = op;
12223
12224 for (i = n; i >= 0; --i)
12225 {
12226 op = addends[i];
12227 switch (GET_CODE (op))
12228 {
12229 case MULT:
12230 if (index)
12231 return 0;
12232 index = XEXP (op, 0);
12233 scale_rtx = XEXP (op, 1);
12234 break;
12235
12236 case ASHIFT:
12237 if (index)
12238 return 0;
12239 index = XEXP (op, 0);
12240 tmp = XEXP (op, 1);
12241 if (!CONST_INT_P (tmp))
12242 return 0;
12243 scale = INTVAL (tmp);
12244 if ((unsigned HOST_WIDE_INT) scale > 3)
12245 return 0;
12246 scale = 1 << scale;
12247 break;
12248
12249 case ZERO_EXTEND:
12250 op = XEXP (op, 0);
12251 if (GET_CODE (op) != UNSPEC)
12252 return 0;
12253 /* FALLTHRU */
12254
12255 case UNSPEC:
12256 if (XINT (op, 1) == UNSPEC_TP
12257 && TARGET_TLS_DIRECT_SEG_REFS
12258 && seg == SEG_DEFAULT)
12259 seg = DEFAULT_TLS_SEG_REG;
12260 else
12261 return 0;
12262 break;
12263
12264 case SUBREG:
12265 if (!REG_P (SUBREG_REG (op)))
12266 return 0;
12267 /* FALLTHRU */
12268
12269 case REG:
12270 if (!base)
12271 base = op;
12272 else if (!index)
12273 index = op;
12274 else
12275 return 0;
12276 break;
12277
12278 case CONST:
12279 case CONST_INT:
12280 case SYMBOL_REF:
12281 case LABEL_REF:
12282 if (disp)
12283 return 0;
12284 disp = op;
12285 break;
12286
12287 default:
12288 return 0;
12289 }
12290 }
12291 }
12292 else if (GET_CODE (addr) == MULT)
12293 {
12294 index = XEXP (addr, 0); /* index*scale */
12295 scale_rtx = XEXP (addr, 1);
12296 }
12297 else if (GET_CODE (addr) == ASHIFT)
12298 {
12299 /* We're called for lea too, which implements ashift on occasion. */
12300 index = XEXP (addr, 0);
12301 tmp = XEXP (addr, 1);
12302 if (!CONST_INT_P (tmp))
12303 return 0;
12304 scale = INTVAL (tmp);
12305 if ((unsigned HOST_WIDE_INT) scale > 3)
12306 return 0;
12307 scale = 1 << scale;
12308 retval = -1;
12309 }
12310 else
12311 disp = addr; /* displacement */
12312
12313 if (index)
12314 {
12315 if (REG_P (index))
12316 ;
12317 else if (GET_CODE (index) == SUBREG
12318 && REG_P (SUBREG_REG (index)))
12319 ;
12320 else
12321 return 0;
12322 }
12323
12324 /* Extract the integral value of scale. */
12325 if (scale_rtx)
12326 {
12327 if (!CONST_INT_P (scale_rtx))
12328 return 0;
12329 scale = INTVAL (scale_rtx);
12330 }
12331
12332 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12333 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12334
12335 /* Avoid useless 0 displacement. */
12336 if (disp == const0_rtx && (base || index))
12337 disp = NULL_RTX;
12338
12339 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12340 if (base_reg && index_reg && scale == 1
12341 && (index_reg == arg_pointer_rtx
12342 || index_reg == frame_pointer_rtx
12343 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12344 {
12345 rtx tmp;
12346 tmp = base, base = index, index = tmp;
12347 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12348 }
12349
12350 /* Special case: %ebp cannot be encoded as a base without a displacement.
12351 Similarly %r13. */
12352 if (!disp
12353 && base_reg
12354 && (base_reg == hard_frame_pointer_rtx
12355 || base_reg == frame_pointer_rtx
12356 || base_reg == arg_pointer_rtx
12357 || (REG_P (base_reg)
12358 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12359 || REGNO (base_reg) == R13_REG))))
12360 disp = const0_rtx;
12361
12362 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12363 Avoid this by transforming to [%esi+0].
12364 Reload calls address legitimization without cfun defined, so we need
12365 to test cfun for being non-NULL. */
12366 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12367 && base_reg && !index_reg && !disp
12368 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12369 disp = const0_rtx;
12370
12371 /* Special case: encode reg+reg instead of reg*2. */
12372 if (!base && index && scale == 2)
12373 base = index, base_reg = index_reg, scale = 1;
12374
12375 /* Special case: scaling cannot be encoded without base or displacement. */
12376 if (!base && !disp && index && scale != 1)
12377 disp = const0_rtx;
12378
12379 out->base = base;
12380 out->index = index;
12381 out->disp = disp;
12382 out->scale = scale;
12383 out->seg = seg;
12384
12385 return retval;
12386 }
12387 \f
12388 /* Return cost of the memory address x.
12389 For i386, it is better to use a complex address than let gcc copy
12390 the address into a reg and make a new pseudo. But not if the address
12391 requires to two regs - that would mean more pseudos with longer
12392 lifetimes. */
12393 static int
12394 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12395 {
12396 struct ix86_address parts;
12397 int cost = 1;
12398 int ok = ix86_decompose_address (x, &parts);
12399
12400 gcc_assert (ok);
12401
12402 if (parts.base && GET_CODE (parts.base) == SUBREG)
12403 parts.base = SUBREG_REG (parts.base);
12404 if (parts.index && GET_CODE (parts.index) == SUBREG)
12405 parts.index = SUBREG_REG (parts.index);
12406
12407 /* Attempt to minimize number of registers in the address. */
12408 if ((parts.base
12409 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12410 || (parts.index
12411 && (!REG_P (parts.index)
12412 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12413 cost++;
12414
12415 if (parts.base
12416 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12417 && parts.index
12418 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12419 && parts.base != parts.index)
12420 cost++;
12421
12422 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12423 since it's predecode logic can't detect the length of instructions
12424 and it degenerates to vector decoded. Increase cost of such
12425 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12426 to split such addresses or even refuse such addresses at all.
12427
12428 Following addressing modes are affected:
12429 [base+scale*index]
12430 [scale*index+disp]
12431 [base+index]
12432
12433 The first and last case may be avoidable by explicitly coding the zero in
12434 memory address, but I don't have AMD-K6 machine handy to check this
12435 theory. */
12436
12437 if (TARGET_K6
12438 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12439 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12440 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12441 cost += 10;
12442
12443 return cost;
12444 }
12445 \f
12446 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12447 this is used for to form addresses to local data when -fPIC is in
12448 use. */
12449
12450 static bool
12451 darwin_local_data_pic (rtx disp)
12452 {
12453 return (GET_CODE (disp) == UNSPEC
12454 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12455 }
12456
12457 /* Determine if a given RTX is a valid constant. We already know this
12458 satisfies CONSTANT_P. */
12459
12460 static bool
12461 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12462 {
12463 switch (GET_CODE (x))
12464 {
12465 case CONST:
12466 x = XEXP (x, 0);
12467
12468 if (GET_CODE (x) == PLUS)
12469 {
12470 if (!CONST_INT_P (XEXP (x, 1)))
12471 return false;
12472 x = XEXP (x, 0);
12473 }
12474
12475 if (TARGET_MACHO && darwin_local_data_pic (x))
12476 return true;
12477
12478 /* Only some unspecs are valid as "constants". */
12479 if (GET_CODE (x) == UNSPEC)
12480 switch (XINT (x, 1))
12481 {
12482 case UNSPEC_GOT:
12483 case UNSPEC_GOTOFF:
12484 case UNSPEC_PLTOFF:
12485 return TARGET_64BIT;
12486 case UNSPEC_TPOFF:
12487 case UNSPEC_NTPOFF:
12488 x = XVECEXP (x, 0, 0);
12489 return (GET_CODE (x) == SYMBOL_REF
12490 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12491 case UNSPEC_DTPOFF:
12492 x = XVECEXP (x, 0, 0);
12493 return (GET_CODE (x) == SYMBOL_REF
12494 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12495 default:
12496 return false;
12497 }
12498
12499 /* We must have drilled down to a symbol. */
12500 if (GET_CODE (x) == LABEL_REF)
12501 return true;
12502 if (GET_CODE (x) != SYMBOL_REF)
12503 return false;
12504 /* FALLTHRU */
12505
12506 case SYMBOL_REF:
12507 /* TLS symbols are never valid. */
12508 if (SYMBOL_REF_TLS_MODEL (x))
12509 return false;
12510
12511 /* DLLIMPORT symbols are never valid. */
12512 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12513 && SYMBOL_REF_DLLIMPORT_P (x))
12514 return false;
12515
12516 #if TARGET_MACHO
12517 /* mdynamic-no-pic */
12518 if (MACHO_DYNAMIC_NO_PIC_P)
12519 return machopic_symbol_defined_p (x);
12520 #endif
12521 break;
12522
12523 case CONST_DOUBLE:
12524 if (GET_MODE (x) == TImode
12525 && x != CONST0_RTX (TImode)
12526 && !TARGET_64BIT)
12527 return false;
12528 break;
12529
12530 case CONST_VECTOR:
12531 if (!standard_sse_constant_p (x))
12532 return false;
12533
12534 default:
12535 break;
12536 }
12537
12538 /* Otherwise we handle everything else in the move patterns. */
12539 return true;
12540 }
12541
12542 /* Determine if it's legal to put X into the constant pool. This
12543 is not possible for the address of thread-local symbols, which
12544 is checked above. */
12545
12546 static bool
12547 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12548 {
12549 /* We can always put integral constants and vectors in memory. */
12550 switch (GET_CODE (x))
12551 {
12552 case CONST_INT:
12553 case CONST_DOUBLE:
12554 case CONST_VECTOR:
12555 return false;
12556
12557 default:
12558 break;
12559 }
12560 return !ix86_legitimate_constant_p (mode, x);
12561 }
12562
12563 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12564 otherwise zero. */
12565
12566 static bool
12567 is_imported_p (rtx x)
12568 {
12569 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12570 || GET_CODE (x) != SYMBOL_REF)
12571 return false;
12572
12573 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12574 }
12575
12576
12577 /* Nonzero if the constant value X is a legitimate general operand
12578 when generating PIC code. It is given that flag_pic is on and
12579 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12580
12581 bool
12582 legitimate_pic_operand_p (rtx x)
12583 {
12584 rtx inner;
12585
12586 switch (GET_CODE (x))
12587 {
12588 case CONST:
12589 inner = XEXP (x, 0);
12590 if (GET_CODE (inner) == PLUS
12591 && CONST_INT_P (XEXP (inner, 1)))
12592 inner = XEXP (inner, 0);
12593
12594 /* Only some unspecs are valid as "constants". */
12595 if (GET_CODE (inner) == UNSPEC)
12596 switch (XINT (inner, 1))
12597 {
12598 case UNSPEC_GOT:
12599 case UNSPEC_GOTOFF:
12600 case UNSPEC_PLTOFF:
12601 return TARGET_64BIT;
12602 case UNSPEC_TPOFF:
12603 x = XVECEXP (inner, 0, 0);
12604 return (GET_CODE (x) == SYMBOL_REF
12605 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12606 case UNSPEC_MACHOPIC_OFFSET:
12607 return legitimate_pic_address_disp_p (x);
12608 default:
12609 return false;
12610 }
12611 /* FALLTHRU */
12612
12613 case SYMBOL_REF:
12614 case LABEL_REF:
12615 return legitimate_pic_address_disp_p (x);
12616
12617 default:
12618 return true;
12619 }
12620 }
12621
12622 /* Determine if a given CONST RTX is a valid memory displacement
12623 in PIC mode. */
12624
12625 bool
12626 legitimate_pic_address_disp_p (rtx disp)
12627 {
12628 bool saw_plus;
12629
12630 /* In 64bit mode we can allow direct addresses of symbols and labels
12631 when they are not dynamic symbols. */
12632 if (TARGET_64BIT)
12633 {
12634 rtx op0 = disp, op1;
12635
12636 switch (GET_CODE (disp))
12637 {
12638 case LABEL_REF:
12639 return true;
12640
12641 case CONST:
12642 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12643 break;
12644 op0 = XEXP (XEXP (disp, 0), 0);
12645 op1 = XEXP (XEXP (disp, 0), 1);
12646 if (!CONST_INT_P (op1)
12647 || INTVAL (op1) >= 16*1024*1024
12648 || INTVAL (op1) < -16*1024*1024)
12649 break;
12650 if (GET_CODE (op0) == LABEL_REF)
12651 return true;
12652 if (GET_CODE (op0) == CONST
12653 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12654 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12655 return true;
12656 if (GET_CODE (op0) == UNSPEC
12657 && XINT (op0, 1) == UNSPEC_PCREL)
12658 return true;
12659 if (GET_CODE (op0) != SYMBOL_REF)
12660 break;
12661 /* FALLTHRU */
12662
12663 case SYMBOL_REF:
12664 /* TLS references should always be enclosed in UNSPEC.
12665 The dllimported symbol needs always to be resolved. */
12666 if (SYMBOL_REF_TLS_MODEL (op0)
12667 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12668 return false;
12669
12670 if (TARGET_PECOFF)
12671 {
12672 if (is_imported_p (op0))
12673 return true;
12674
12675 if (SYMBOL_REF_FAR_ADDR_P (op0)
12676 || !SYMBOL_REF_LOCAL_P (op0))
12677 break;
12678
12679 /* Function-symbols need to be resolved only for
12680 large-model.
12681 For the small-model we don't need to resolve anything
12682 here. */
12683 if ((ix86_cmodel != CM_LARGE_PIC
12684 && SYMBOL_REF_FUNCTION_P (op0))
12685 || ix86_cmodel == CM_SMALL_PIC)
12686 return true;
12687 /* Non-external symbols don't need to be resolved for
12688 large, and medium-model. */
12689 if ((ix86_cmodel == CM_LARGE_PIC
12690 || ix86_cmodel == CM_MEDIUM_PIC)
12691 && !SYMBOL_REF_EXTERNAL_P (op0))
12692 return true;
12693 }
12694 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12695 && SYMBOL_REF_LOCAL_P (op0)
12696 && ix86_cmodel != CM_LARGE_PIC)
12697 return true;
12698 break;
12699
12700 default:
12701 break;
12702 }
12703 }
12704 if (GET_CODE (disp) != CONST)
12705 return false;
12706 disp = XEXP (disp, 0);
12707
12708 if (TARGET_64BIT)
12709 {
12710 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12711 of GOT tables. We should not need these anyway. */
12712 if (GET_CODE (disp) != UNSPEC
12713 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12714 && XINT (disp, 1) != UNSPEC_GOTOFF
12715 && XINT (disp, 1) != UNSPEC_PCREL
12716 && XINT (disp, 1) != UNSPEC_PLTOFF))
12717 return false;
12718
12719 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12720 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12721 return false;
12722 return true;
12723 }
12724
12725 saw_plus = false;
12726 if (GET_CODE (disp) == PLUS)
12727 {
12728 if (!CONST_INT_P (XEXP (disp, 1)))
12729 return false;
12730 disp = XEXP (disp, 0);
12731 saw_plus = true;
12732 }
12733
12734 if (TARGET_MACHO && darwin_local_data_pic (disp))
12735 return true;
12736
12737 if (GET_CODE (disp) != UNSPEC)
12738 return false;
12739
12740 switch (XINT (disp, 1))
12741 {
12742 case UNSPEC_GOT:
12743 if (saw_plus)
12744 return false;
12745 /* We need to check for both symbols and labels because VxWorks loads
12746 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12747 details. */
12748 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12749 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12750 case UNSPEC_GOTOFF:
12751 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12752 While ABI specify also 32bit relocation but we don't produce it in
12753 small PIC model at all. */
12754 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12755 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12756 && !TARGET_64BIT)
12757 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12758 return false;
12759 case UNSPEC_GOTTPOFF:
12760 case UNSPEC_GOTNTPOFF:
12761 case UNSPEC_INDNTPOFF:
12762 if (saw_plus)
12763 return false;
12764 disp = XVECEXP (disp, 0, 0);
12765 return (GET_CODE (disp) == SYMBOL_REF
12766 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12767 case UNSPEC_NTPOFF:
12768 disp = XVECEXP (disp, 0, 0);
12769 return (GET_CODE (disp) == SYMBOL_REF
12770 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12771 case UNSPEC_DTPOFF:
12772 disp = XVECEXP (disp, 0, 0);
12773 return (GET_CODE (disp) == SYMBOL_REF
12774 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12775 }
12776
12777 return false;
12778 }
12779
12780 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12781 replace the input X, or the original X if no replacement is called for.
12782 The output parameter *WIN is 1 if the calling macro should goto WIN,
12783 0 if it should not. */
12784
12785 bool
12786 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12787 int)
12788 {
12789 /* Reload can generate:
12790
12791 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12792 (reg:DI 97))
12793 (reg:DI 2 cx))
12794
12795 This RTX is rejected from ix86_legitimate_address_p due to
12796 non-strictness of base register 97. Following this rejection,
12797 reload pushes all three components into separate registers,
12798 creating invalid memory address RTX.
12799
12800 Following code reloads only the invalid part of the
12801 memory address RTX. */
12802
12803 if (GET_CODE (x) == PLUS
12804 && REG_P (XEXP (x, 1))
12805 && GET_CODE (XEXP (x, 0)) == PLUS
12806 && REG_P (XEXP (XEXP (x, 0), 1)))
12807 {
12808 rtx base, index;
12809 bool something_reloaded = false;
12810
12811 base = XEXP (XEXP (x, 0), 1);
12812 if (!REG_OK_FOR_BASE_STRICT_P (base))
12813 {
12814 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12815 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12816 opnum, (enum reload_type) type);
12817 something_reloaded = true;
12818 }
12819
12820 index = XEXP (x, 1);
12821 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12822 {
12823 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12824 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12825 opnum, (enum reload_type) type);
12826 something_reloaded = true;
12827 }
12828
12829 gcc_assert (something_reloaded);
12830 return true;
12831 }
12832
12833 return false;
12834 }
12835
12836 /* Determine if op is suitable RTX for an address register.
12837 Return naked register if a register or a register subreg is
12838 found, otherwise return NULL_RTX. */
12839
12840 static rtx
12841 ix86_validate_address_register (rtx op)
12842 {
12843 enum machine_mode mode = GET_MODE (op);
12844
12845 /* Only SImode or DImode registers can form the address. */
12846 if (mode != SImode && mode != DImode)
12847 return NULL_RTX;
12848
12849 if (REG_P (op))
12850 return op;
12851 else if (GET_CODE (op) == SUBREG)
12852 {
12853 rtx reg = SUBREG_REG (op);
12854
12855 if (!REG_P (reg))
12856 return NULL_RTX;
12857
12858 mode = GET_MODE (reg);
12859
12860 /* Don't allow SUBREGs that span more than a word. It can
12861 lead to spill failures when the register is one word out
12862 of a two word structure. */
12863 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12864 return NULL_RTX;
12865
12866 /* Allow only SUBREGs of non-eliminable hard registers. */
12867 if (register_no_elim_operand (reg, mode))
12868 return reg;
12869 }
12870
12871 /* Op is not a register. */
12872 return NULL_RTX;
12873 }
12874
12875 /* Recognizes RTL expressions that are valid memory addresses for an
12876 instruction. The MODE argument is the machine mode for the MEM
12877 expression that wants to use this address.
12878
12879 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12880 convert common non-canonical forms to canonical form so that they will
12881 be recognized. */
12882
12883 static bool
12884 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12885 {
12886 struct ix86_address parts;
12887 rtx base, index, disp;
12888 HOST_WIDE_INT scale;
12889 enum ix86_address_seg seg;
12890
12891 if (ix86_decompose_address (addr, &parts) <= 0)
12892 /* Decomposition failed. */
12893 return false;
12894
12895 base = parts.base;
12896 index = parts.index;
12897 disp = parts.disp;
12898 scale = parts.scale;
12899 seg = parts.seg;
12900
12901 /* Validate base register. */
12902 if (base)
12903 {
12904 rtx reg = ix86_validate_address_register (base);
12905
12906 if (reg == NULL_RTX)
12907 return false;
12908
12909 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12910 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12911 /* Base is not valid. */
12912 return false;
12913 }
12914
12915 /* Validate index register. */
12916 if (index)
12917 {
12918 rtx reg = ix86_validate_address_register (index);
12919
12920 if (reg == NULL_RTX)
12921 return false;
12922
12923 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12924 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12925 /* Index is not valid. */
12926 return false;
12927 }
12928
12929 /* Index and base should have the same mode. */
12930 if (base && index
12931 && GET_MODE (base) != GET_MODE (index))
12932 return false;
12933
12934 /* Address override works only on the (%reg) part of %fs:(%reg). */
12935 if (seg != SEG_DEFAULT
12936 && ((base && GET_MODE (base) != word_mode)
12937 || (index && GET_MODE (index) != word_mode)))
12938 return false;
12939
12940 /* Validate scale factor. */
12941 if (scale != 1)
12942 {
12943 if (!index)
12944 /* Scale without index. */
12945 return false;
12946
12947 if (scale != 2 && scale != 4 && scale != 8)
12948 /* Scale is not a valid multiplier. */
12949 return false;
12950 }
12951
12952 /* Validate displacement. */
12953 if (disp)
12954 {
12955 if (GET_CODE (disp) == CONST
12956 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12957 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12958 switch (XINT (XEXP (disp, 0), 1))
12959 {
12960 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12961 used. While ABI specify also 32bit relocations, we don't produce
12962 them at all and use IP relative instead. */
12963 case UNSPEC_GOT:
12964 case UNSPEC_GOTOFF:
12965 gcc_assert (flag_pic);
12966 if (!TARGET_64BIT)
12967 goto is_legitimate_pic;
12968
12969 /* 64bit address unspec. */
12970 return false;
12971
12972 case UNSPEC_GOTPCREL:
12973 case UNSPEC_PCREL:
12974 gcc_assert (flag_pic);
12975 goto is_legitimate_pic;
12976
12977 case UNSPEC_GOTTPOFF:
12978 case UNSPEC_GOTNTPOFF:
12979 case UNSPEC_INDNTPOFF:
12980 case UNSPEC_NTPOFF:
12981 case UNSPEC_DTPOFF:
12982 break;
12983
12984 case UNSPEC_STACK_CHECK:
12985 gcc_assert (flag_split_stack);
12986 break;
12987
12988 default:
12989 /* Invalid address unspec. */
12990 return false;
12991 }
12992
12993 else if (SYMBOLIC_CONST (disp)
12994 && (flag_pic
12995 || (TARGET_MACHO
12996 #if TARGET_MACHO
12997 && MACHOPIC_INDIRECT
12998 && !machopic_operand_p (disp)
12999 #endif
13000 )))
13001 {
13002
13003 is_legitimate_pic:
13004 if (TARGET_64BIT && (index || base))
13005 {
13006 /* foo@dtpoff(%rX) is ok. */
13007 if (GET_CODE (disp) != CONST
13008 || GET_CODE (XEXP (disp, 0)) != PLUS
13009 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13010 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13011 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13012 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13013 /* Non-constant pic memory reference. */
13014 return false;
13015 }
13016 else if ((!TARGET_MACHO || flag_pic)
13017 && ! legitimate_pic_address_disp_p (disp))
13018 /* Displacement is an invalid pic construct. */
13019 return false;
13020 #if TARGET_MACHO
13021 else if (MACHO_DYNAMIC_NO_PIC_P
13022 && !ix86_legitimate_constant_p (Pmode, disp))
13023 /* displacment must be referenced via non_lazy_pointer */
13024 return false;
13025 #endif
13026
13027 /* This code used to verify that a symbolic pic displacement
13028 includes the pic_offset_table_rtx register.
13029
13030 While this is good idea, unfortunately these constructs may
13031 be created by "adds using lea" optimization for incorrect
13032 code like:
13033
13034 int a;
13035 int foo(int i)
13036 {
13037 return *(&a+i);
13038 }
13039
13040 This code is nonsensical, but results in addressing
13041 GOT table with pic_offset_table_rtx base. We can't
13042 just refuse it easily, since it gets matched by
13043 "addsi3" pattern, that later gets split to lea in the
13044 case output register differs from input. While this
13045 can be handled by separate addsi pattern for this case
13046 that never results in lea, this seems to be easier and
13047 correct fix for crash to disable this test. */
13048 }
13049 else if (GET_CODE (disp) != LABEL_REF
13050 && !CONST_INT_P (disp)
13051 && (GET_CODE (disp) != CONST
13052 || !ix86_legitimate_constant_p (Pmode, disp))
13053 && (GET_CODE (disp) != SYMBOL_REF
13054 || !ix86_legitimate_constant_p (Pmode, disp)))
13055 /* Displacement is not constant. */
13056 return false;
13057 else if (TARGET_64BIT
13058 && !x86_64_immediate_operand (disp, VOIDmode))
13059 /* Displacement is out of range. */
13060 return false;
13061 /* In x32 mode, constant addresses are sign extended to 64bit, so
13062 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13063 else if (TARGET_X32 && !(index || base)
13064 && CONST_INT_P (disp)
13065 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13066 return false;
13067 }
13068
13069 /* Everything looks valid. */
13070 return true;
13071 }
13072
13073 /* Determine if a given RTX is a valid constant address. */
13074
13075 bool
13076 constant_address_p (rtx x)
13077 {
13078 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13079 }
13080 \f
13081 /* Return a unique alias set for the GOT. */
13082
13083 static alias_set_type
13084 ix86_GOT_alias_set (void)
13085 {
13086 static alias_set_type set = -1;
13087 if (set == -1)
13088 set = new_alias_set ();
13089 return set;
13090 }
13091
13092 /* Return a legitimate reference for ORIG (an address) using the
13093 register REG. If REG is 0, a new pseudo is generated.
13094
13095 There are two types of references that must be handled:
13096
13097 1. Global data references must load the address from the GOT, via
13098 the PIC reg. An insn is emitted to do this load, and the reg is
13099 returned.
13100
13101 2. Static data references, constant pool addresses, and code labels
13102 compute the address as an offset from the GOT, whose base is in
13103 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13104 differentiate them from global data objects. The returned
13105 address is the PIC reg + an unspec constant.
13106
13107 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13108 reg also appears in the address. */
13109
13110 static rtx
13111 legitimize_pic_address (rtx orig, rtx reg)
13112 {
13113 rtx addr = orig;
13114 rtx new_rtx = orig;
13115
13116 #if TARGET_MACHO
13117 if (TARGET_MACHO && !TARGET_64BIT)
13118 {
13119 if (reg == 0)
13120 reg = gen_reg_rtx (Pmode);
13121 /* Use the generic Mach-O PIC machinery. */
13122 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13123 }
13124 #endif
13125
13126 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13127 {
13128 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13129 if (tmp)
13130 return tmp;
13131 }
13132
13133 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13134 new_rtx = addr;
13135 else if (TARGET_64BIT && !TARGET_PECOFF
13136 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13137 {
13138 rtx tmpreg;
13139 /* This symbol may be referenced via a displacement from the PIC
13140 base address (@GOTOFF). */
13141
13142 if (reload_in_progress)
13143 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13144 if (GET_CODE (addr) == CONST)
13145 addr = XEXP (addr, 0);
13146 if (GET_CODE (addr) == PLUS)
13147 {
13148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13149 UNSPEC_GOTOFF);
13150 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13151 }
13152 else
13153 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13154 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13155 if (!reg)
13156 tmpreg = gen_reg_rtx (Pmode);
13157 else
13158 tmpreg = reg;
13159 emit_move_insn (tmpreg, new_rtx);
13160
13161 if (reg != 0)
13162 {
13163 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13164 tmpreg, 1, OPTAB_DIRECT);
13165 new_rtx = reg;
13166 }
13167 else
13168 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13169 }
13170 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13171 {
13172 /* This symbol may be referenced via a displacement from the PIC
13173 base address (@GOTOFF). */
13174
13175 if (reload_in_progress)
13176 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13177 if (GET_CODE (addr) == CONST)
13178 addr = XEXP (addr, 0);
13179 if (GET_CODE (addr) == PLUS)
13180 {
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13182 UNSPEC_GOTOFF);
13183 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13184 }
13185 else
13186 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13187 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13188 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13189
13190 if (reg != 0)
13191 {
13192 emit_move_insn (reg, new_rtx);
13193 new_rtx = reg;
13194 }
13195 }
13196 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13197 /* We can't use @GOTOFF for text labels on VxWorks;
13198 see gotoff_operand. */
13199 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13200 {
13201 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13202 if (tmp)
13203 return tmp;
13204
13205 /* For x64 PE-COFF there is no GOT table. So we use address
13206 directly. */
13207 if (TARGET_64BIT && TARGET_PECOFF)
13208 {
13209 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13210 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13211
13212 if (reg == 0)
13213 reg = gen_reg_rtx (Pmode);
13214 emit_move_insn (reg, new_rtx);
13215 new_rtx = reg;
13216 }
13217 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13218 {
13219 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13220 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13221 new_rtx = gen_const_mem (Pmode, new_rtx);
13222 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13223
13224 if (reg == 0)
13225 reg = gen_reg_rtx (Pmode);
13226 /* Use directly gen_movsi, otherwise the address is loaded
13227 into register for CSE. We don't want to CSE this addresses,
13228 instead we CSE addresses from the GOT table, so skip this. */
13229 emit_insn (gen_movsi (reg, new_rtx));
13230 new_rtx = reg;
13231 }
13232 else
13233 {
13234 /* This symbol must be referenced via a load from the
13235 Global Offset Table (@GOT). */
13236
13237 if (reload_in_progress)
13238 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13239 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13240 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13241 if (TARGET_64BIT)
13242 new_rtx = force_reg (Pmode, new_rtx);
13243 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13244 new_rtx = gen_const_mem (Pmode, new_rtx);
13245 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13246
13247 if (reg == 0)
13248 reg = gen_reg_rtx (Pmode);
13249 emit_move_insn (reg, new_rtx);
13250 new_rtx = reg;
13251 }
13252 }
13253 else
13254 {
13255 if (CONST_INT_P (addr)
13256 && !x86_64_immediate_operand (addr, VOIDmode))
13257 {
13258 if (reg)
13259 {
13260 emit_move_insn (reg, addr);
13261 new_rtx = reg;
13262 }
13263 else
13264 new_rtx = force_reg (Pmode, addr);
13265 }
13266 else if (GET_CODE (addr) == CONST)
13267 {
13268 addr = XEXP (addr, 0);
13269
13270 /* We must match stuff we generate before. Assume the only
13271 unspecs that can get here are ours. Not that we could do
13272 anything with them anyway.... */
13273 if (GET_CODE (addr) == UNSPEC
13274 || (GET_CODE (addr) == PLUS
13275 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13276 return orig;
13277 gcc_assert (GET_CODE (addr) == PLUS);
13278 }
13279 if (GET_CODE (addr) == PLUS)
13280 {
13281 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13282
13283 /* Check first to see if this is a constant offset from a @GOTOFF
13284 symbol reference. */
13285 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13286 && CONST_INT_P (op1))
13287 {
13288 if (!TARGET_64BIT)
13289 {
13290 if (reload_in_progress)
13291 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13292 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13293 UNSPEC_GOTOFF);
13294 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13295 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13296 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13297
13298 if (reg != 0)
13299 {
13300 emit_move_insn (reg, new_rtx);
13301 new_rtx = reg;
13302 }
13303 }
13304 else
13305 {
13306 if (INTVAL (op1) < -16*1024*1024
13307 || INTVAL (op1) >= 16*1024*1024)
13308 {
13309 if (!x86_64_immediate_operand (op1, Pmode))
13310 op1 = force_reg (Pmode, op1);
13311 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13312 }
13313 }
13314 }
13315 else
13316 {
13317 rtx base = legitimize_pic_address (op0, reg);
13318 enum machine_mode mode = GET_MODE (base);
13319 new_rtx
13320 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13321
13322 if (CONST_INT_P (new_rtx))
13323 {
13324 if (INTVAL (new_rtx) < -16*1024*1024
13325 || INTVAL (new_rtx) >= 16*1024*1024)
13326 {
13327 if (!x86_64_immediate_operand (new_rtx, mode))
13328 new_rtx = force_reg (mode, new_rtx);
13329 new_rtx
13330 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13331 }
13332 else
13333 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13334 }
13335 else
13336 {
13337 if (GET_CODE (new_rtx) == PLUS
13338 && CONSTANT_P (XEXP (new_rtx, 1)))
13339 {
13340 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13341 new_rtx = XEXP (new_rtx, 1);
13342 }
13343 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13344 }
13345 }
13346 }
13347 }
13348 return new_rtx;
13349 }
13350 \f
13351 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13352
13353 static rtx
13354 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13355 {
13356 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13357
13358 if (GET_MODE (tp) != tp_mode)
13359 {
13360 gcc_assert (GET_MODE (tp) == SImode);
13361 gcc_assert (tp_mode == DImode);
13362
13363 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13364 }
13365
13366 if (to_reg)
13367 tp = copy_to_mode_reg (tp_mode, tp);
13368
13369 return tp;
13370 }
13371
13372 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13373
13374 static GTY(()) rtx ix86_tls_symbol;
13375
13376 static rtx
13377 ix86_tls_get_addr (void)
13378 {
13379 if (!ix86_tls_symbol)
13380 {
13381 const char *sym
13382 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13383 ? "___tls_get_addr" : "__tls_get_addr");
13384
13385 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13386 }
13387
13388 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13389 {
13390 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13391 UNSPEC_PLTOFF);
13392 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13393 gen_rtx_CONST (Pmode, unspec));
13394 }
13395
13396 return ix86_tls_symbol;
13397 }
13398
13399 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13400
13401 static GTY(()) rtx ix86_tls_module_base_symbol;
13402
13403 rtx
13404 ix86_tls_module_base (void)
13405 {
13406 if (!ix86_tls_module_base_symbol)
13407 {
13408 ix86_tls_module_base_symbol
13409 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13410
13411 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13412 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13413 }
13414
13415 return ix86_tls_module_base_symbol;
13416 }
13417
13418 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13419 false if we expect this to be used for a memory address and true if
13420 we expect to load the address into a register. */
13421
13422 static rtx
13423 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13424 {
13425 rtx dest, base, off;
13426 rtx pic = NULL_RTX, tp = NULL_RTX;
13427 enum machine_mode tp_mode = Pmode;
13428 int type;
13429
13430 /* Fall back to global dynamic model if tool chain cannot support local
13431 dynamic. */
13432 if (TARGET_SUN_TLS && !TARGET_64BIT
13433 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13434 && model == TLS_MODEL_LOCAL_DYNAMIC)
13435 model = TLS_MODEL_GLOBAL_DYNAMIC;
13436
13437 switch (model)
13438 {
13439 case TLS_MODEL_GLOBAL_DYNAMIC:
13440 dest = gen_reg_rtx (Pmode);
13441
13442 if (!TARGET_64BIT)
13443 {
13444 if (flag_pic && !TARGET_PECOFF)
13445 pic = pic_offset_table_rtx;
13446 else
13447 {
13448 pic = gen_reg_rtx (Pmode);
13449 emit_insn (gen_set_got (pic));
13450 }
13451 }
13452
13453 if (TARGET_GNU2_TLS)
13454 {
13455 if (TARGET_64BIT)
13456 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13457 else
13458 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13459
13460 tp = get_thread_pointer (Pmode, true);
13461 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13462
13463 if (GET_MODE (x) != Pmode)
13464 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13465
13466 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13467 }
13468 else
13469 {
13470 rtx caddr = ix86_tls_get_addr ();
13471
13472 if (TARGET_64BIT)
13473 {
13474 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13475 rtx insns;
13476
13477 start_sequence ();
13478 emit_call_insn
13479 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13480 insns = get_insns ();
13481 end_sequence ();
13482
13483 if (GET_MODE (x) != Pmode)
13484 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13485
13486 RTL_CONST_CALL_P (insns) = 1;
13487 emit_libcall_block (insns, dest, rax, x);
13488 }
13489 else
13490 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13491 }
13492 break;
13493
13494 case TLS_MODEL_LOCAL_DYNAMIC:
13495 base = gen_reg_rtx (Pmode);
13496
13497 if (!TARGET_64BIT)
13498 {
13499 if (flag_pic)
13500 pic = pic_offset_table_rtx;
13501 else
13502 {
13503 pic = gen_reg_rtx (Pmode);
13504 emit_insn (gen_set_got (pic));
13505 }
13506 }
13507
13508 if (TARGET_GNU2_TLS)
13509 {
13510 rtx tmp = ix86_tls_module_base ();
13511
13512 if (TARGET_64BIT)
13513 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13514 else
13515 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13516
13517 tp = get_thread_pointer (Pmode, true);
13518 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13519 gen_rtx_MINUS (Pmode, tmp, tp));
13520 }
13521 else
13522 {
13523 rtx caddr = ix86_tls_get_addr ();
13524
13525 if (TARGET_64BIT)
13526 {
13527 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13528 rtx insns, eqv;
13529
13530 start_sequence ();
13531 emit_call_insn
13532 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13533 insns = get_insns ();
13534 end_sequence ();
13535
13536 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13537 share the LD_BASE result with other LD model accesses. */
13538 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13539 UNSPEC_TLS_LD_BASE);
13540
13541 RTL_CONST_CALL_P (insns) = 1;
13542 emit_libcall_block (insns, base, rax, eqv);
13543 }
13544 else
13545 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13546 }
13547
13548 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13549 off = gen_rtx_CONST (Pmode, off);
13550
13551 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13552
13553 if (TARGET_GNU2_TLS)
13554 {
13555 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13556
13557 if (GET_MODE (x) != Pmode)
13558 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13559
13560 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13561 }
13562 break;
13563
13564 case TLS_MODEL_INITIAL_EXEC:
13565 if (TARGET_64BIT)
13566 {
13567 if (TARGET_SUN_TLS && !TARGET_X32)
13568 {
13569 /* The Sun linker took the AMD64 TLS spec literally
13570 and can only handle %rax as destination of the
13571 initial executable code sequence. */
13572
13573 dest = gen_reg_rtx (DImode);
13574 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13575 return dest;
13576 }
13577
13578 /* Generate DImode references to avoid %fs:(%reg32)
13579 problems and linker IE->LE relaxation bug. */
13580 tp_mode = DImode;
13581 pic = NULL;
13582 type = UNSPEC_GOTNTPOFF;
13583 }
13584 else if (flag_pic)
13585 {
13586 if (reload_in_progress)
13587 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13588 pic = pic_offset_table_rtx;
13589 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13590 }
13591 else if (!TARGET_ANY_GNU_TLS)
13592 {
13593 pic = gen_reg_rtx (Pmode);
13594 emit_insn (gen_set_got (pic));
13595 type = UNSPEC_GOTTPOFF;
13596 }
13597 else
13598 {
13599 pic = NULL;
13600 type = UNSPEC_INDNTPOFF;
13601 }
13602
13603 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13604 off = gen_rtx_CONST (tp_mode, off);
13605 if (pic)
13606 off = gen_rtx_PLUS (tp_mode, pic, off);
13607 off = gen_const_mem (tp_mode, off);
13608 set_mem_alias_set (off, ix86_GOT_alias_set ());
13609
13610 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13611 {
13612 base = get_thread_pointer (tp_mode,
13613 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13614 off = force_reg (tp_mode, off);
13615 return gen_rtx_PLUS (tp_mode, base, off);
13616 }
13617 else
13618 {
13619 base = get_thread_pointer (Pmode, true);
13620 dest = gen_reg_rtx (Pmode);
13621 emit_insn (ix86_gen_sub3 (dest, base, off));
13622 }
13623 break;
13624
13625 case TLS_MODEL_LOCAL_EXEC:
13626 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13627 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13628 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13629 off = gen_rtx_CONST (Pmode, off);
13630
13631 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13632 {
13633 base = get_thread_pointer (Pmode,
13634 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13635 return gen_rtx_PLUS (Pmode, base, off);
13636 }
13637 else
13638 {
13639 base = get_thread_pointer (Pmode, true);
13640 dest = gen_reg_rtx (Pmode);
13641 emit_insn (ix86_gen_sub3 (dest, base, off));
13642 }
13643 break;
13644
13645 default:
13646 gcc_unreachable ();
13647 }
13648
13649 return dest;
13650 }
13651
13652 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13653 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13654 unique refptr-DECL symbol corresponding to symbol DECL. */
13655
13656 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13657 htab_t dllimport_map;
13658
13659 static tree
13660 get_dllimport_decl (tree decl, bool beimport)
13661 {
13662 struct tree_map *h, in;
13663 void **loc;
13664 const char *name;
13665 const char *prefix;
13666 size_t namelen, prefixlen;
13667 char *imp_name;
13668 tree to;
13669 rtx rtl;
13670
13671 if (!dllimport_map)
13672 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13673
13674 in.hash = htab_hash_pointer (decl);
13675 in.base.from = decl;
13676 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13677 h = (struct tree_map *) *loc;
13678 if (h)
13679 return h->to;
13680
13681 *loc = h = ggc_alloc<tree_map> ();
13682 h->hash = in.hash;
13683 h->base.from = decl;
13684 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13685 VAR_DECL, NULL, ptr_type_node);
13686 DECL_ARTIFICIAL (to) = 1;
13687 DECL_IGNORED_P (to) = 1;
13688 DECL_EXTERNAL (to) = 1;
13689 TREE_READONLY (to) = 1;
13690
13691 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13692 name = targetm.strip_name_encoding (name);
13693 if (beimport)
13694 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13695 ? "*__imp_" : "*__imp__";
13696 else
13697 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13698 namelen = strlen (name);
13699 prefixlen = strlen (prefix);
13700 imp_name = (char *) alloca (namelen + prefixlen + 1);
13701 memcpy (imp_name, prefix, prefixlen);
13702 memcpy (imp_name + prefixlen, name, namelen + 1);
13703
13704 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13705 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13706 SET_SYMBOL_REF_DECL (rtl, to);
13707 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13708 if (!beimport)
13709 {
13710 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13711 #ifdef SUB_TARGET_RECORD_STUB
13712 SUB_TARGET_RECORD_STUB (name);
13713 #endif
13714 }
13715
13716 rtl = gen_const_mem (Pmode, rtl);
13717 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13718
13719 SET_DECL_RTL (to, rtl);
13720 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13721
13722 return to;
13723 }
13724
13725 /* Expand SYMBOL into its corresponding far-addresse symbol.
13726 WANT_REG is true if we require the result be a register. */
13727
13728 static rtx
13729 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13730 {
13731 tree imp_decl;
13732 rtx x;
13733
13734 gcc_assert (SYMBOL_REF_DECL (symbol));
13735 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13736
13737 x = DECL_RTL (imp_decl);
13738 if (want_reg)
13739 x = force_reg (Pmode, x);
13740 return x;
13741 }
13742
13743 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13744 true if we require the result be a register. */
13745
13746 static rtx
13747 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13748 {
13749 tree imp_decl;
13750 rtx x;
13751
13752 gcc_assert (SYMBOL_REF_DECL (symbol));
13753 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13754
13755 x = DECL_RTL (imp_decl);
13756 if (want_reg)
13757 x = force_reg (Pmode, x);
13758 return x;
13759 }
13760
13761 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13762 is true if we require the result be a register. */
13763
13764 static rtx
13765 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13766 {
13767 if (!TARGET_PECOFF)
13768 return NULL_RTX;
13769
13770 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13771 {
13772 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13773 return legitimize_dllimport_symbol (addr, inreg);
13774 if (GET_CODE (addr) == CONST
13775 && GET_CODE (XEXP (addr, 0)) == PLUS
13776 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13777 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13778 {
13779 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13780 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13781 }
13782 }
13783
13784 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13785 return NULL_RTX;
13786 if (GET_CODE (addr) == SYMBOL_REF
13787 && !is_imported_p (addr)
13788 && SYMBOL_REF_EXTERNAL_P (addr)
13789 && SYMBOL_REF_DECL (addr))
13790 return legitimize_pe_coff_extern_decl (addr, inreg);
13791
13792 if (GET_CODE (addr) == CONST
13793 && GET_CODE (XEXP (addr, 0)) == PLUS
13794 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13795 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13796 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13797 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13798 {
13799 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13800 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13801 }
13802 return NULL_RTX;
13803 }
13804
13805 /* Try machine-dependent ways of modifying an illegitimate address
13806 to be legitimate. If we find one, return the new, valid address.
13807 This macro is used in only one place: `memory_address' in explow.c.
13808
13809 OLDX is the address as it was before break_out_memory_refs was called.
13810 In some cases it is useful to look at this to decide what needs to be done.
13811
13812 It is always safe for this macro to do nothing. It exists to recognize
13813 opportunities to optimize the output.
13814
13815 For the 80386, we handle X+REG by loading X into a register R and
13816 using R+REG. R will go in a general reg and indexing will be used.
13817 However, if REG is a broken-out memory address or multiplication,
13818 nothing needs to be done because REG can certainly go in a general reg.
13819
13820 When -fpic is used, special handling is needed for symbolic references.
13821 See comments by legitimize_pic_address in i386.c for details. */
13822
13823 static rtx
13824 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13825 {
13826 int changed = 0;
13827 unsigned log;
13828
13829 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13830 if (log)
13831 return legitimize_tls_address (x, (enum tls_model) log, false);
13832 if (GET_CODE (x) == CONST
13833 && GET_CODE (XEXP (x, 0)) == PLUS
13834 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13835 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13836 {
13837 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13838 (enum tls_model) log, false);
13839 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13840 }
13841
13842 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13843 {
13844 rtx tmp = legitimize_pe_coff_symbol (x, true);
13845 if (tmp)
13846 return tmp;
13847 }
13848
13849 if (flag_pic && SYMBOLIC_CONST (x))
13850 return legitimize_pic_address (x, 0);
13851
13852 #if TARGET_MACHO
13853 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13854 return machopic_indirect_data_reference (x, 0);
13855 #endif
13856
13857 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13858 if (GET_CODE (x) == ASHIFT
13859 && CONST_INT_P (XEXP (x, 1))
13860 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13861 {
13862 changed = 1;
13863 log = INTVAL (XEXP (x, 1));
13864 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13865 GEN_INT (1 << log));
13866 }
13867
13868 if (GET_CODE (x) == PLUS)
13869 {
13870 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13871
13872 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13873 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13874 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13875 {
13876 changed = 1;
13877 log = INTVAL (XEXP (XEXP (x, 0), 1));
13878 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13879 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13880 GEN_INT (1 << log));
13881 }
13882
13883 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13884 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13885 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13886 {
13887 changed = 1;
13888 log = INTVAL (XEXP (XEXP (x, 1), 1));
13889 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13890 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13891 GEN_INT (1 << log));
13892 }
13893
13894 /* Put multiply first if it isn't already. */
13895 if (GET_CODE (XEXP (x, 1)) == MULT)
13896 {
13897 rtx tmp = XEXP (x, 0);
13898 XEXP (x, 0) = XEXP (x, 1);
13899 XEXP (x, 1) = tmp;
13900 changed = 1;
13901 }
13902
13903 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13904 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13905 created by virtual register instantiation, register elimination, and
13906 similar optimizations. */
13907 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13908 {
13909 changed = 1;
13910 x = gen_rtx_PLUS (Pmode,
13911 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13912 XEXP (XEXP (x, 1), 0)),
13913 XEXP (XEXP (x, 1), 1));
13914 }
13915
13916 /* Canonicalize
13917 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13918 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13919 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13920 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13921 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13922 && CONSTANT_P (XEXP (x, 1)))
13923 {
13924 rtx constant;
13925 rtx other = NULL_RTX;
13926
13927 if (CONST_INT_P (XEXP (x, 1)))
13928 {
13929 constant = XEXP (x, 1);
13930 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13931 }
13932 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13933 {
13934 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13935 other = XEXP (x, 1);
13936 }
13937 else
13938 constant = 0;
13939
13940 if (constant)
13941 {
13942 changed = 1;
13943 x = gen_rtx_PLUS (Pmode,
13944 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13945 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13946 plus_constant (Pmode, other,
13947 INTVAL (constant)));
13948 }
13949 }
13950
13951 if (changed && ix86_legitimate_address_p (mode, x, false))
13952 return x;
13953
13954 if (GET_CODE (XEXP (x, 0)) == MULT)
13955 {
13956 changed = 1;
13957 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13958 }
13959
13960 if (GET_CODE (XEXP (x, 1)) == MULT)
13961 {
13962 changed = 1;
13963 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13964 }
13965
13966 if (changed
13967 && REG_P (XEXP (x, 1))
13968 && REG_P (XEXP (x, 0)))
13969 return x;
13970
13971 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13972 {
13973 changed = 1;
13974 x = legitimize_pic_address (x, 0);
13975 }
13976
13977 if (changed && ix86_legitimate_address_p (mode, x, false))
13978 return x;
13979
13980 if (REG_P (XEXP (x, 0)))
13981 {
13982 rtx temp = gen_reg_rtx (Pmode);
13983 rtx val = force_operand (XEXP (x, 1), temp);
13984 if (val != temp)
13985 {
13986 val = convert_to_mode (Pmode, val, 1);
13987 emit_move_insn (temp, val);
13988 }
13989
13990 XEXP (x, 1) = temp;
13991 return x;
13992 }
13993
13994 else if (REG_P (XEXP (x, 1)))
13995 {
13996 rtx temp = gen_reg_rtx (Pmode);
13997 rtx val = force_operand (XEXP (x, 0), temp);
13998 if (val != temp)
13999 {
14000 val = convert_to_mode (Pmode, val, 1);
14001 emit_move_insn (temp, val);
14002 }
14003
14004 XEXP (x, 0) = temp;
14005 return x;
14006 }
14007 }
14008
14009 return x;
14010 }
14011 \f
14012 /* Print an integer constant expression in assembler syntax. Addition
14013 and subtraction are the only arithmetic that may appear in these
14014 expressions. FILE is the stdio stream to write to, X is the rtx, and
14015 CODE is the operand print code from the output string. */
14016
14017 static void
14018 output_pic_addr_const (FILE *file, rtx x, int code)
14019 {
14020 char buf[256];
14021
14022 switch (GET_CODE (x))
14023 {
14024 case PC:
14025 gcc_assert (flag_pic);
14026 putc ('.', file);
14027 break;
14028
14029 case SYMBOL_REF:
14030 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14031 output_addr_const (file, x);
14032 else
14033 {
14034 const char *name = XSTR (x, 0);
14035
14036 /* Mark the decl as referenced so that cgraph will
14037 output the function. */
14038 if (SYMBOL_REF_DECL (x))
14039 mark_decl_referenced (SYMBOL_REF_DECL (x));
14040
14041 #if TARGET_MACHO
14042 if (MACHOPIC_INDIRECT
14043 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14044 name = machopic_indirection_name (x, /*stub_p=*/true);
14045 #endif
14046 assemble_name (file, name);
14047 }
14048 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14049 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14050 fputs ("@PLT", file);
14051 break;
14052
14053 case LABEL_REF:
14054 x = XEXP (x, 0);
14055 /* FALLTHRU */
14056 case CODE_LABEL:
14057 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14058 assemble_name (asm_out_file, buf);
14059 break;
14060
14061 case CONST_INT:
14062 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14063 break;
14064
14065 case CONST:
14066 /* This used to output parentheses around the expression,
14067 but that does not work on the 386 (either ATT or BSD assembler). */
14068 output_pic_addr_const (file, XEXP (x, 0), code);
14069 break;
14070
14071 case CONST_DOUBLE:
14072 if (GET_MODE (x) == VOIDmode)
14073 {
14074 /* We can use %d if the number is <32 bits and positive. */
14075 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14076 fprintf (file, "0x%lx%08lx",
14077 (unsigned long) CONST_DOUBLE_HIGH (x),
14078 (unsigned long) CONST_DOUBLE_LOW (x));
14079 else
14080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14081 }
14082 else
14083 /* We can't handle floating point constants;
14084 TARGET_PRINT_OPERAND must handle them. */
14085 output_operand_lossage ("floating constant misused");
14086 break;
14087
14088 case PLUS:
14089 /* Some assemblers need integer constants to appear first. */
14090 if (CONST_INT_P (XEXP (x, 0)))
14091 {
14092 output_pic_addr_const (file, XEXP (x, 0), code);
14093 putc ('+', file);
14094 output_pic_addr_const (file, XEXP (x, 1), code);
14095 }
14096 else
14097 {
14098 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14099 output_pic_addr_const (file, XEXP (x, 1), code);
14100 putc ('+', file);
14101 output_pic_addr_const (file, XEXP (x, 0), code);
14102 }
14103 break;
14104
14105 case MINUS:
14106 if (!TARGET_MACHO)
14107 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14108 output_pic_addr_const (file, XEXP (x, 0), code);
14109 putc ('-', file);
14110 output_pic_addr_const (file, XEXP (x, 1), code);
14111 if (!TARGET_MACHO)
14112 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14113 break;
14114
14115 case UNSPEC:
14116 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14117 {
14118 bool f = i386_asm_output_addr_const_extra (file, x);
14119 gcc_assert (f);
14120 break;
14121 }
14122
14123 gcc_assert (XVECLEN (x, 0) == 1);
14124 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14125 switch (XINT (x, 1))
14126 {
14127 case UNSPEC_GOT:
14128 fputs ("@GOT", file);
14129 break;
14130 case UNSPEC_GOTOFF:
14131 fputs ("@GOTOFF", file);
14132 break;
14133 case UNSPEC_PLTOFF:
14134 fputs ("@PLTOFF", file);
14135 break;
14136 case UNSPEC_PCREL:
14137 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14138 "(%rip)" : "[rip]", file);
14139 break;
14140 case UNSPEC_GOTPCREL:
14141 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14142 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14143 break;
14144 case UNSPEC_GOTTPOFF:
14145 /* FIXME: This might be @TPOFF in Sun ld too. */
14146 fputs ("@gottpoff", file);
14147 break;
14148 case UNSPEC_TPOFF:
14149 fputs ("@tpoff", file);
14150 break;
14151 case UNSPEC_NTPOFF:
14152 if (TARGET_64BIT)
14153 fputs ("@tpoff", file);
14154 else
14155 fputs ("@ntpoff", file);
14156 break;
14157 case UNSPEC_DTPOFF:
14158 fputs ("@dtpoff", file);
14159 break;
14160 case UNSPEC_GOTNTPOFF:
14161 if (TARGET_64BIT)
14162 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14163 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14164 else
14165 fputs ("@gotntpoff", file);
14166 break;
14167 case UNSPEC_INDNTPOFF:
14168 fputs ("@indntpoff", file);
14169 break;
14170 #if TARGET_MACHO
14171 case UNSPEC_MACHOPIC_OFFSET:
14172 putc ('-', file);
14173 machopic_output_function_base_name (file);
14174 break;
14175 #endif
14176 default:
14177 output_operand_lossage ("invalid UNSPEC as operand");
14178 break;
14179 }
14180 break;
14181
14182 default:
14183 output_operand_lossage ("invalid expression as operand");
14184 }
14185 }
14186
14187 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14188 We need to emit DTP-relative relocations. */
14189
14190 static void ATTRIBUTE_UNUSED
14191 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14192 {
14193 fputs (ASM_LONG, file);
14194 output_addr_const (file, x);
14195 fputs ("@dtpoff", file);
14196 switch (size)
14197 {
14198 case 4:
14199 break;
14200 case 8:
14201 fputs (", 0", file);
14202 break;
14203 default:
14204 gcc_unreachable ();
14205 }
14206 }
14207
14208 /* Return true if X is a representation of the PIC register. This copes
14209 with calls from ix86_find_base_term, where the register might have
14210 been replaced by a cselib value. */
14211
14212 static bool
14213 ix86_pic_register_p (rtx x)
14214 {
14215 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14216 return (pic_offset_table_rtx
14217 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14218 else
14219 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14220 }
14221
14222 /* Helper function for ix86_delegitimize_address.
14223 Attempt to delegitimize TLS local-exec accesses. */
14224
14225 static rtx
14226 ix86_delegitimize_tls_address (rtx orig_x)
14227 {
14228 rtx x = orig_x, unspec;
14229 struct ix86_address addr;
14230
14231 if (!TARGET_TLS_DIRECT_SEG_REFS)
14232 return orig_x;
14233 if (MEM_P (x))
14234 x = XEXP (x, 0);
14235 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14236 return orig_x;
14237 if (ix86_decompose_address (x, &addr) == 0
14238 || addr.seg != DEFAULT_TLS_SEG_REG
14239 || addr.disp == NULL_RTX
14240 || GET_CODE (addr.disp) != CONST)
14241 return orig_x;
14242 unspec = XEXP (addr.disp, 0);
14243 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14244 unspec = XEXP (unspec, 0);
14245 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14246 return orig_x;
14247 x = XVECEXP (unspec, 0, 0);
14248 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14249 if (unspec != XEXP (addr.disp, 0))
14250 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14251 if (addr.index)
14252 {
14253 rtx idx = addr.index;
14254 if (addr.scale != 1)
14255 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14256 x = gen_rtx_PLUS (Pmode, idx, x);
14257 }
14258 if (addr.base)
14259 x = gen_rtx_PLUS (Pmode, addr.base, x);
14260 if (MEM_P (orig_x))
14261 x = replace_equiv_address_nv (orig_x, x);
14262 return x;
14263 }
14264
14265 /* In the name of slightly smaller debug output, and to cater to
14266 general assembler lossage, recognize PIC+GOTOFF and turn it back
14267 into a direct symbol reference.
14268
14269 On Darwin, this is necessary to avoid a crash, because Darwin
14270 has a different PIC label for each routine but the DWARF debugging
14271 information is not associated with any particular routine, so it's
14272 necessary to remove references to the PIC label from RTL stored by
14273 the DWARF output code. */
14274
14275 static rtx
14276 ix86_delegitimize_address (rtx x)
14277 {
14278 rtx orig_x = delegitimize_mem_from_attrs (x);
14279 /* addend is NULL or some rtx if x is something+GOTOFF where
14280 something doesn't include the PIC register. */
14281 rtx addend = NULL_RTX;
14282 /* reg_addend is NULL or a multiple of some register. */
14283 rtx reg_addend = NULL_RTX;
14284 /* const_addend is NULL or a const_int. */
14285 rtx const_addend = NULL_RTX;
14286 /* This is the result, or NULL. */
14287 rtx result = NULL_RTX;
14288
14289 x = orig_x;
14290
14291 if (MEM_P (x))
14292 x = XEXP (x, 0);
14293
14294 if (TARGET_64BIT)
14295 {
14296 if (GET_CODE (x) == CONST
14297 && GET_CODE (XEXP (x, 0)) == PLUS
14298 && GET_MODE (XEXP (x, 0)) == Pmode
14299 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14300 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14301 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14302 {
14303 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14304 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14305 if (MEM_P (orig_x))
14306 x = replace_equiv_address_nv (orig_x, x);
14307 return x;
14308 }
14309
14310 if (GET_CODE (x) == CONST
14311 && GET_CODE (XEXP (x, 0)) == UNSPEC
14312 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14313 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14314 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14315 {
14316 x = XVECEXP (XEXP (x, 0), 0, 0);
14317 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14318 {
14319 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14320 GET_MODE (x), 0);
14321 if (x == NULL_RTX)
14322 return orig_x;
14323 }
14324 return x;
14325 }
14326
14327 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14328 return ix86_delegitimize_tls_address (orig_x);
14329
14330 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14331 and -mcmodel=medium -fpic. */
14332 }
14333
14334 if (GET_CODE (x) != PLUS
14335 || GET_CODE (XEXP (x, 1)) != CONST)
14336 return ix86_delegitimize_tls_address (orig_x);
14337
14338 if (ix86_pic_register_p (XEXP (x, 0)))
14339 /* %ebx + GOT/GOTOFF */
14340 ;
14341 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14342 {
14343 /* %ebx + %reg * scale + GOT/GOTOFF */
14344 reg_addend = XEXP (x, 0);
14345 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14346 reg_addend = XEXP (reg_addend, 1);
14347 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14348 reg_addend = XEXP (reg_addend, 0);
14349 else
14350 {
14351 reg_addend = NULL_RTX;
14352 addend = XEXP (x, 0);
14353 }
14354 }
14355 else
14356 addend = XEXP (x, 0);
14357
14358 x = XEXP (XEXP (x, 1), 0);
14359 if (GET_CODE (x) == PLUS
14360 && CONST_INT_P (XEXP (x, 1)))
14361 {
14362 const_addend = XEXP (x, 1);
14363 x = XEXP (x, 0);
14364 }
14365
14366 if (GET_CODE (x) == UNSPEC
14367 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14368 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14369 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14370 && !MEM_P (orig_x) && !addend)))
14371 result = XVECEXP (x, 0, 0);
14372
14373 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14374 && !MEM_P (orig_x))
14375 result = XVECEXP (x, 0, 0);
14376
14377 if (! result)
14378 return ix86_delegitimize_tls_address (orig_x);
14379
14380 if (const_addend)
14381 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14382 if (reg_addend)
14383 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14384 if (addend)
14385 {
14386 /* If the rest of original X doesn't involve the PIC register, add
14387 addend and subtract pic_offset_table_rtx. This can happen e.g.
14388 for code like:
14389 leal (%ebx, %ecx, 4), %ecx
14390 ...
14391 movl foo@GOTOFF(%ecx), %edx
14392 in which case we return (%ecx - %ebx) + foo. */
14393 if (pic_offset_table_rtx)
14394 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14395 pic_offset_table_rtx),
14396 result);
14397 else
14398 return orig_x;
14399 }
14400 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14401 {
14402 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14403 if (result == NULL_RTX)
14404 return orig_x;
14405 }
14406 return result;
14407 }
14408
14409 /* If X is a machine specific address (i.e. a symbol or label being
14410 referenced as a displacement from the GOT implemented using an
14411 UNSPEC), then return the base term. Otherwise return X. */
14412
14413 rtx
14414 ix86_find_base_term (rtx x)
14415 {
14416 rtx term;
14417
14418 if (TARGET_64BIT)
14419 {
14420 if (GET_CODE (x) != CONST)
14421 return x;
14422 term = XEXP (x, 0);
14423 if (GET_CODE (term) == PLUS
14424 && (CONST_INT_P (XEXP (term, 1))
14425 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14426 term = XEXP (term, 0);
14427 if (GET_CODE (term) != UNSPEC
14428 || (XINT (term, 1) != UNSPEC_GOTPCREL
14429 && XINT (term, 1) != UNSPEC_PCREL))
14430 return x;
14431
14432 return XVECEXP (term, 0, 0);
14433 }
14434
14435 return ix86_delegitimize_address (x);
14436 }
14437 \f
14438 static void
14439 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14440 bool fp, FILE *file)
14441 {
14442 const char *suffix;
14443
14444 if (mode == CCFPmode || mode == CCFPUmode)
14445 {
14446 code = ix86_fp_compare_code_to_integer (code);
14447 mode = CCmode;
14448 }
14449 if (reverse)
14450 code = reverse_condition (code);
14451
14452 switch (code)
14453 {
14454 case EQ:
14455 switch (mode)
14456 {
14457 case CCAmode:
14458 suffix = "a";
14459 break;
14460
14461 case CCCmode:
14462 suffix = "c";
14463 break;
14464
14465 case CCOmode:
14466 suffix = "o";
14467 break;
14468
14469 case CCSmode:
14470 suffix = "s";
14471 break;
14472
14473 default:
14474 suffix = "e";
14475 }
14476 break;
14477 case NE:
14478 switch (mode)
14479 {
14480 case CCAmode:
14481 suffix = "na";
14482 break;
14483
14484 case CCCmode:
14485 suffix = "nc";
14486 break;
14487
14488 case CCOmode:
14489 suffix = "no";
14490 break;
14491
14492 case CCSmode:
14493 suffix = "ns";
14494 break;
14495
14496 default:
14497 suffix = "ne";
14498 }
14499 break;
14500 case GT:
14501 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14502 suffix = "g";
14503 break;
14504 case GTU:
14505 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14506 Those same assemblers have the same but opposite lossage on cmov. */
14507 if (mode == CCmode)
14508 suffix = fp ? "nbe" : "a";
14509 else
14510 gcc_unreachable ();
14511 break;
14512 case LT:
14513 switch (mode)
14514 {
14515 case CCNOmode:
14516 case CCGOCmode:
14517 suffix = "s";
14518 break;
14519
14520 case CCmode:
14521 case CCGCmode:
14522 suffix = "l";
14523 break;
14524
14525 default:
14526 gcc_unreachable ();
14527 }
14528 break;
14529 case LTU:
14530 if (mode == CCmode)
14531 suffix = "b";
14532 else if (mode == CCCmode)
14533 suffix = "c";
14534 else
14535 gcc_unreachable ();
14536 break;
14537 case GE:
14538 switch (mode)
14539 {
14540 case CCNOmode:
14541 case CCGOCmode:
14542 suffix = "ns";
14543 break;
14544
14545 case CCmode:
14546 case CCGCmode:
14547 suffix = "ge";
14548 break;
14549
14550 default:
14551 gcc_unreachable ();
14552 }
14553 break;
14554 case GEU:
14555 if (mode == CCmode)
14556 suffix = fp ? "nb" : "ae";
14557 else if (mode == CCCmode)
14558 suffix = "nc";
14559 else
14560 gcc_unreachable ();
14561 break;
14562 case LE:
14563 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14564 suffix = "le";
14565 break;
14566 case LEU:
14567 if (mode == CCmode)
14568 suffix = "be";
14569 else
14570 gcc_unreachable ();
14571 break;
14572 case UNORDERED:
14573 suffix = fp ? "u" : "p";
14574 break;
14575 case ORDERED:
14576 suffix = fp ? "nu" : "np";
14577 break;
14578 default:
14579 gcc_unreachable ();
14580 }
14581 fputs (suffix, file);
14582 }
14583
14584 /* Print the name of register X to FILE based on its machine mode and number.
14585 If CODE is 'w', pretend the mode is HImode.
14586 If CODE is 'b', pretend the mode is QImode.
14587 If CODE is 'k', pretend the mode is SImode.
14588 If CODE is 'q', pretend the mode is DImode.
14589 If CODE is 'x', pretend the mode is V4SFmode.
14590 If CODE is 't', pretend the mode is V8SFmode.
14591 If CODE is 'g', pretend the mode is V16SFmode.
14592 If CODE is 'h', pretend the reg is the 'high' byte register.
14593 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14594 If CODE is 'd', duplicate the operand for AVX instruction.
14595 */
14596
14597 void
14598 print_reg (rtx x, int code, FILE *file)
14599 {
14600 const char *reg;
14601 unsigned int regno;
14602 bool duplicated = code == 'd' && TARGET_AVX;
14603
14604 if (ASSEMBLER_DIALECT == ASM_ATT)
14605 putc ('%', file);
14606
14607 if (x == pc_rtx)
14608 {
14609 gcc_assert (TARGET_64BIT);
14610 fputs ("rip", file);
14611 return;
14612 }
14613
14614 regno = true_regnum (x);
14615 gcc_assert (regno != ARG_POINTER_REGNUM
14616 && regno != FRAME_POINTER_REGNUM
14617 && regno != FLAGS_REG
14618 && regno != FPSR_REG
14619 && regno != FPCR_REG);
14620
14621 if (code == 'w' || MMX_REG_P (x))
14622 code = 2;
14623 else if (code == 'b')
14624 code = 1;
14625 else if (code == 'k')
14626 code = 4;
14627 else if (code == 'q')
14628 code = 8;
14629 else if (code == 'y')
14630 code = 3;
14631 else if (code == 'h')
14632 code = 0;
14633 else if (code == 'x')
14634 code = 16;
14635 else if (code == 't')
14636 code = 32;
14637 else if (code == 'g')
14638 code = 64;
14639 else
14640 code = GET_MODE_SIZE (GET_MODE (x));
14641
14642 /* Irritatingly, AMD extended registers use different naming convention
14643 from the normal registers: "r%d[bwd]" */
14644 if (REX_INT_REGNO_P (regno))
14645 {
14646 gcc_assert (TARGET_64BIT);
14647 putc ('r', file);
14648 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14649 switch (code)
14650 {
14651 case 0:
14652 error ("extended registers have no high halves");
14653 break;
14654 case 1:
14655 putc ('b', file);
14656 break;
14657 case 2:
14658 putc ('w', file);
14659 break;
14660 case 4:
14661 putc ('d', file);
14662 break;
14663 case 8:
14664 /* no suffix */
14665 break;
14666 default:
14667 error ("unsupported operand size for extended register");
14668 break;
14669 }
14670 return;
14671 }
14672
14673 reg = NULL;
14674 switch (code)
14675 {
14676 case 3:
14677 if (STACK_TOP_P (x))
14678 {
14679 reg = "st(0)";
14680 break;
14681 }
14682 /* FALLTHRU */
14683 case 8:
14684 case 4:
14685 case 12:
14686 if (! ANY_FP_REG_P (x))
14687 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14688 /* FALLTHRU */
14689 case 16:
14690 case 2:
14691 normal:
14692 reg = hi_reg_name[regno];
14693 break;
14694 case 1:
14695 if (regno >= ARRAY_SIZE (qi_reg_name))
14696 goto normal;
14697 reg = qi_reg_name[regno];
14698 break;
14699 case 0:
14700 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14701 goto normal;
14702 reg = qi_high_reg_name[regno];
14703 break;
14704 case 32:
14705 if (SSE_REG_P (x))
14706 {
14707 gcc_assert (!duplicated);
14708 putc ('y', file);
14709 fputs (hi_reg_name[regno] + 1, file);
14710 return;
14711 }
14712 case 64:
14713 if (SSE_REG_P (x))
14714 {
14715 gcc_assert (!duplicated);
14716 putc ('z', file);
14717 fputs (hi_reg_name[REGNO (x)] + 1, file);
14718 return;
14719 }
14720 break;
14721 default:
14722 gcc_unreachable ();
14723 }
14724
14725 fputs (reg, file);
14726 if (duplicated)
14727 {
14728 if (ASSEMBLER_DIALECT == ASM_ATT)
14729 fprintf (file, ", %%%s", reg);
14730 else
14731 fprintf (file, ", %s", reg);
14732 }
14733 }
14734
14735 /* Locate some local-dynamic symbol still in use by this function
14736 so that we can print its name in some tls_local_dynamic_base
14737 pattern. */
14738
14739 static int
14740 get_some_local_dynamic_name_1 (rtx *px, void *)
14741 {
14742 rtx x = *px;
14743
14744 if (GET_CODE (x) == SYMBOL_REF
14745 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14746 {
14747 cfun->machine->some_ld_name = XSTR (x, 0);
14748 return 1;
14749 }
14750
14751 return 0;
14752 }
14753
14754 static const char *
14755 get_some_local_dynamic_name (void)
14756 {
14757 rtx insn;
14758
14759 if (cfun->machine->some_ld_name)
14760 return cfun->machine->some_ld_name;
14761
14762 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14763 if (NONDEBUG_INSN_P (insn)
14764 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14765 return cfun->machine->some_ld_name;
14766
14767 return NULL;
14768 }
14769
14770 /* Meaning of CODE:
14771 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14772 C -- print opcode suffix for set/cmov insn.
14773 c -- like C, but print reversed condition
14774 F,f -- likewise, but for floating-point.
14775 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14776 otherwise nothing
14777 R -- print embeded rounding and sae.
14778 r -- print only sae.
14779 z -- print the opcode suffix for the size of the current operand.
14780 Z -- likewise, with special suffixes for x87 instructions.
14781 * -- print a star (in certain assembler syntax)
14782 A -- print an absolute memory reference.
14783 E -- print address with DImode register names if TARGET_64BIT.
14784 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14785 s -- print a shift double count, followed by the assemblers argument
14786 delimiter.
14787 b -- print the QImode name of the register for the indicated operand.
14788 %b0 would print %al if operands[0] is reg 0.
14789 w -- likewise, print the HImode name of the register.
14790 k -- likewise, print the SImode name of the register.
14791 q -- likewise, print the DImode name of the register.
14792 x -- likewise, print the V4SFmode name of the register.
14793 t -- likewise, print the V8SFmode name of the register.
14794 g -- likewise, print the V16SFmode name of the register.
14795 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14796 y -- print "st(0)" instead of "st" as a register.
14797 d -- print duplicated register operand for AVX instruction.
14798 D -- print condition for SSE cmp instruction.
14799 P -- if PIC, print an @PLT suffix.
14800 p -- print raw symbol name.
14801 X -- don't print any sort of PIC '@' suffix for a symbol.
14802 & -- print some in-use local-dynamic symbol name.
14803 H -- print a memory address offset by 8; used for sse high-parts
14804 Y -- print condition for XOP pcom* instruction.
14805 + -- print a branch hint as 'cs' or 'ds' prefix
14806 ; -- print a semicolon (after prefixes due to bug in older gas).
14807 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14808 @ -- print a segment register of thread base pointer load
14809 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14810 */
14811
14812 void
14813 ix86_print_operand (FILE *file, rtx x, int code)
14814 {
14815 if (code)
14816 {
14817 switch (code)
14818 {
14819 case 'A':
14820 switch (ASSEMBLER_DIALECT)
14821 {
14822 case ASM_ATT:
14823 putc ('*', file);
14824 break;
14825
14826 case ASM_INTEL:
14827 /* Intel syntax. For absolute addresses, registers should not
14828 be surrounded by braces. */
14829 if (!REG_P (x))
14830 {
14831 putc ('[', file);
14832 ix86_print_operand (file, x, 0);
14833 putc (']', file);
14834 return;
14835 }
14836 break;
14837
14838 default:
14839 gcc_unreachable ();
14840 }
14841
14842 ix86_print_operand (file, x, 0);
14843 return;
14844
14845 case 'E':
14846 /* Wrap address in an UNSPEC to declare special handling. */
14847 if (TARGET_64BIT)
14848 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14849
14850 output_address (x);
14851 return;
14852
14853 case 'L':
14854 if (ASSEMBLER_DIALECT == ASM_ATT)
14855 putc ('l', file);
14856 return;
14857
14858 case 'W':
14859 if (ASSEMBLER_DIALECT == ASM_ATT)
14860 putc ('w', file);
14861 return;
14862
14863 case 'B':
14864 if (ASSEMBLER_DIALECT == ASM_ATT)
14865 putc ('b', file);
14866 return;
14867
14868 case 'Q':
14869 if (ASSEMBLER_DIALECT == ASM_ATT)
14870 putc ('l', file);
14871 return;
14872
14873 case 'S':
14874 if (ASSEMBLER_DIALECT == ASM_ATT)
14875 putc ('s', file);
14876 return;
14877
14878 case 'T':
14879 if (ASSEMBLER_DIALECT == ASM_ATT)
14880 putc ('t', file);
14881 return;
14882
14883 case 'O':
14884 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14885 if (ASSEMBLER_DIALECT != ASM_ATT)
14886 return;
14887
14888 switch (GET_MODE_SIZE (GET_MODE (x)))
14889 {
14890 case 2:
14891 putc ('w', file);
14892 break;
14893
14894 case 4:
14895 putc ('l', file);
14896 break;
14897
14898 case 8:
14899 putc ('q', file);
14900 break;
14901
14902 default:
14903 output_operand_lossage
14904 ("invalid operand size for operand code 'O'");
14905 return;
14906 }
14907
14908 putc ('.', file);
14909 #endif
14910 return;
14911
14912 case 'z':
14913 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14914 {
14915 /* Opcodes don't get size suffixes if using Intel opcodes. */
14916 if (ASSEMBLER_DIALECT == ASM_INTEL)
14917 return;
14918
14919 switch (GET_MODE_SIZE (GET_MODE (x)))
14920 {
14921 case 1:
14922 putc ('b', file);
14923 return;
14924
14925 case 2:
14926 putc ('w', file);
14927 return;
14928
14929 case 4:
14930 putc ('l', file);
14931 return;
14932
14933 case 8:
14934 putc ('q', file);
14935 return;
14936
14937 default:
14938 output_operand_lossage
14939 ("invalid operand size for operand code 'z'");
14940 return;
14941 }
14942 }
14943
14944 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14945 warning
14946 (0, "non-integer operand used with operand code 'z'");
14947 /* FALLTHRU */
14948
14949 case 'Z':
14950 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14951 if (ASSEMBLER_DIALECT == ASM_INTEL)
14952 return;
14953
14954 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14955 {
14956 switch (GET_MODE_SIZE (GET_MODE (x)))
14957 {
14958 case 2:
14959 #ifdef HAVE_AS_IX86_FILDS
14960 putc ('s', file);
14961 #endif
14962 return;
14963
14964 case 4:
14965 putc ('l', file);
14966 return;
14967
14968 case 8:
14969 #ifdef HAVE_AS_IX86_FILDQ
14970 putc ('q', file);
14971 #else
14972 fputs ("ll", file);
14973 #endif
14974 return;
14975
14976 default:
14977 break;
14978 }
14979 }
14980 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14981 {
14982 /* 387 opcodes don't get size suffixes
14983 if the operands are registers. */
14984 if (STACK_REG_P (x))
14985 return;
14986
14987 switch (GET_MODE_SIZE (GET_MODE (x)))
14988 {
14989 case 4:
14990 putc ('s', file);
14991 return;
14992
14993 case 8:
14994 putc ('l', file);
14995 return;
14996
14997 case 12:
14998 case 16:
14999 putc ('t', file);
15000 return;
15001
15002 default:
15003 break;
15004 }
15005 }
15006 else
15007 {
15008 output_operand_lossage
15009 ("invalid operand type used with operand code 'Z'");
15010 return;
15011 }
15012
15013 output_operand_lossage
15014 ("invalid operand size for operand code 'Z'");
15015 return;
15016
15017 case 'd':
15018 case 'b':
15019 case 'w':
15020 case 'k':
15021 case 'q':
15022 case 'h':
15023 case 't':
15024 case 'g':
15025 case 'y':
15026 case 'x':
15027 case 'X':
15028 case 'P':
15029 case 'p':
15030 break;
15031
15032 case 's':
15033 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15034 {
15035 ix86_print_operand (file, x, 0);
15036 fputs (", ", file);
15037 }
15038 return;
15039
15040 case 'Y':
15041 switch (GET_CODE (x))
15042 {
15043 case NE:
15044 fputs ("neq", file);
15045 break;
15046 case EQ:
15047 fputs ("eq", file);
15048 break;
15049 case GE:
15050 case GEU:
15051 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15052 break;
15053 case GT:
15054 case GTU:
15055 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15056 break;
15057 case LE:
15058 case LEU:
15059 fputs ("le", file);
15060 break;
15061 case LT:
15062 case LTU:
15063 fputs ("lt", file);
15064 break;
15065 case UNORDERED:
15066 fputs ("unord", file);
15067 break;
15068 case ORDERED:
15069 fputs ("ord", file);
15070 break;
15071 case UNEQ:
15072 fputs ("ueq", file);
15073 break;
15074 case UNGE:
15075 fputs ("nlt", file);
15076 break;
15077 case UNGT:
15078 fputs ("nle", file);
15079 break;
15080 case UNLE:
15081 fputs ("ule", file);
15082 break;
15083 case UNLT:
15084 fputs ("ult", file);
15085 break;
15086 case LTGT:
15087 fputs ("une", file);
15088 break;
15089 default:
15090 output_operand_lossage ("operand is not a condition code, "
15091 "invalid operand code 'Y'");
15092 return;
15093 }
15094 return;
15095
15096 case 'D':
15097 /* Little bit of braindamage here. The SSE compare instructions
15098 does use completely different names for the comparisons that the
15099 fp conditional moves. */
15100 switch (GET_CODE (x))
15101 {
15102 case UNEQ:
15103 if (TARGET_AVX)
15104 {
15105 fputs ("eq_us", file);
15106 break;
15107 }
15108 case EQ:
15109 fputs ("eq", file);
15110 break;
15111 case UNLT:
15112 if (TARGET_AVX)
15113 {
15114 fputs ("nge", file);
15115 break;
15116 }
15117 case LT:
15118 fputs ("lt", file);
15119 break;
15120 case UNLE:
15121 if (TARGET_AVX)
15122 {
15123 fputs ("ngt", file);
15124 break;
15125 }
15126 case LE:
15127 fputs ("le", file);
15128 break;
15129 case UNORDERED:
15130 fputs ("unord", file);
15131 break;
15132 case LTGT:
15133 if (TARGET_AVX)
15134 {
15135 fputs ("neq_oq", file);
15136 break;
15137 }
15138 case NE:
15139 fputs ("neq", file);
15140 break;
15141 case GE:
15142 if (TARGET_AVX)
15143 {
15144 fputs ("ge", file);
15145 break;
15146 }
15147 case UNGE:
15148 fputs ("nlt", file);
15149 break;
15150 case GT:
15151 if (TARGET_AVX)
15152 {
15153 fputs ("gt", file);
15154 break;
15155 }
15156 case UNGT:
15157 fputs ("nle", file);
15158 break;
15159 case ORDERED:
15160 fputs ("ord", file);
15161 break;
15162 default:
15163 output_operand_lossage ("operand is not a condition code, "
15164 "invalid operand code 'D'");
15165 return;
15166 }
15167 return;
15168
15169 case 'F':
15170 case 'f':
15171 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15172 if (ASSEMBLER_DIALECT == ASM_ATT)
15173 putc ('.', file);
15174 #endif
15175
15176 case 'C':
15177 case 'c':
15178 if (!COMPARISON_P (x))
15179 {
15180 output_operand_lossage ("operand is not a condition code, "
15181 "invalid operand code '%c'", code);
15182 return;
15183 }
15184 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15185 code == 'c' || code == 'f',
15186 code == 'F' || code == 'f',
15187 file);
15188 return;
15189
15190 case 'H':
15191 if (!offsettable_memref_p (x))
15192 {
15193 output_operand_lossage ("operand is not an offsettable memory "
15194 "reference, invalid operand code 'H'");
15195 return;
15196 }
15197 /* It doesn't actually matter what mode we use here, as we're
15198 only going to use this for printing. */
15199 x = adjust_address_nv (x, DImode, 8);
15200 /* Output 'qword ptr' for intel assembler dialect. */
15201 if (ASSEMBLER_DIALECT == ASM_INTEL)
15202 code = 'q';
15203 break;
15204
15205 case 'K':
15206 gcc_assert (CONST_INT_P (x));
15207
15208 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15209 #ifdef HAVE_AS_IX86_HLE
15210 fputs ("xacquire ", file);
15211 #else
15212 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15213 #endif
15214 else if (INTVAL (x) & IX86_HLE_RELEASE)
15215 #ifdef HAVE_AS_IX86_HLE
15216 fputs ("xrelease ", file);
15217 #else
15218 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15219 #endif
15220 /* We do not want to print value of the operand. */
15221 return;
15222
15223 case 'N':
15224 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15225 fputs ("{z}", file);
15226 return;
15227
15228 case 'r':
15229 gcc_assert (CONST_INT_P (x));
15230 gcc_assert (INTVAL (x) == ROUND_SAE);
15231
15232 if (ASSEMBLER_DIALECT == ASM_INTEL)
15233 fputs (", ", file);
15234
15235 fputs ("{sae}", file);
15236
15237 if (ASSEMBLER_DIALECT == ASM_ATT)
15238 fputs (", ", file);
15239
15240 return;
15241
15242 case 'R':
15243 gcc_assert (CONST_INT_P (x));
15244
15245 if (ASSEMBLER_DIALECT == ASM_INTEL)
15246 fputs (", ", file);
15247
15248 switch (INTVAL (x))
15249 {
15250 case ROUND_NEAREST_INT | ROUND_SAE:
15251 fputs ("{rn-sae}", file);
15252 break;
15253 case ROUND_NEG_INF | ROUND_SAE:
15254 fputs ("{rd-sae}", file);
15255 break;
15256 case ROUND_POS_INF | ROUND_SAE:
15257 fputs ("{ru-sae}", file);
15258 break;
15259 case ROUND_ZERO | ROUND_SAE:
15260 fputs ("{rz-sae}", file);
15261 break;
15262 default:
15263 gcc_unreachable ();
15264 }
15265
15266 if (ASSEMBLER_DIALECT == ASM_ATT)
15267 fputs (", ", file);
15268
15269 return;
15270
15271 case '*':
15272 if (ASSEMBLER_DIALECT == ASM_ATT)
15273 putc ('*', file);
15274 return;
15275
15276 case '&':
15277 {
15278 const char *name = get_some_local_dynamic_name ();
15279 if (name == NULL)
15280 output_operand_lossage ("'%%&' used without any "
15281 "local dynamic TLS references");
15282 else
15283 assemble_name (file, name);
15284 return;
15285 }
15286
15287 case '+':
15288 {
15289 rtx x;
15290
15291 if (!optimize
15292 || optimize_function_for_size_p (cfun)
15293 || !TARGET_BRANCH_PREDICTION_HINTS)
15294 return;
15295
15296 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15297 if (x)
15298 {
15299 int pred_val = XINT (x, 0);
15300
15301 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15302 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15303 {
15304 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15305 bool cputaken
15306 = final_forward_branch_p (current_output_insn) == 0;
15307
15308 /* Emit hints only in the case default branch prediction
15309 heuristics would fail. */
15310 if (taken != cputaken)
15311 {
15312 /* We use 3e (DS) prefix for taken branches and
15313 2e (CS) prefix for not taken branches. */
15314 if (taken)
15315 fputs ("ds ; ", file);
15316 else
15317 fputs ("cs ; ", file);
15318 }
15319 }
15320 }
15321 return;
15322 }
15323
15324 case ';':
15325 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15326 putc (';', file);
15327 #endif
15328 return;
15329
15330 case '@':
15331 if (ASSEMBLER_DIALECT == ASM_ATT)
15332 putc ('%', file);
15333
15334 /* The kernel uses a different segment register for performance
15335 reasons; a system call would not have to trash the userspace
15336 segment register, which would be expensive. */
15337 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15338 fputs ("fs", file);
15339 else
15340 fputs ("gs", file);
15341 return;
15342
15343 case '~':
15344 putc (TARGET_AVX2 ? 'i' : 'f', file);
15345 return;
15346
15347 case '^':
15348 if (TARGET_64BIT && Pmode != word_mode)
15349 fputs ("addr32 ", file);
15350 return;
15351
15352 default:
15353 output_operand_lossage ("invalid operand code '%c'", code);
15354 }
15355 }
15356
15357 if (REG_P (x))
15358 print_reg (x, code, file);
15359
15360 else if (MEM_P (x))
15361 {
15362 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15363 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15364 && GET_MODE (x) != BLKmode)
15365 {
15366 const char * size;
15367 switch (GET_MODE_SIZE (GET_MODE (x)))
15368 {
15369 case 1: size = "BYTE"; break;
15370 case 2: size = "WORD"; break;
15371 case 4: size = "DWORD"; break;
15372 case 8: size = "QWORD"; break;
15373 case 12: size = "TBYTE"; break;
15374 case 16:
15375 if (GET_MODE (x) == XFmode)
15376 size = "TBYTE";
15377 else
15378 size = "XMMWORD";
15379 break;
15380 case 32: size = "YMMWORD"; break;
15381 case 64: size = "ZMMWORD"; break;
15382 default:
15383 gcc_unreachable ();
15384 }
15385
15386 /* Check for explicit size override (codes 'b', 'w', 'k',
15387 'q' and 'x') */
15388 if (code == 'b')
15389 size = "BYTE";
15390 else if (code == 'w')
15391 size = "WORD";
15392 else if (code == 'k')
15393 size = "DWORD";
15394 else if (code == 'q')
15395 size = "QWORD";
15396 else if (code == 'x')
15397 size = "XMMWORD";
15398
15399 fputs (size, file);
15400 fputs (" PTR ", file);
15401 }
15402
15403 x = XEXP (x, 0);
15404 /* Avoid (%rip) for call operands. */
15405 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15406 && !CONST_INT_P (x))
15407 output_addr_const (file, x);
15408 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15409 output_operand_lossage ("invalid constraints for operand");
15410 else
15411 output_address (x);
15412 }
15413
15414 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15415 {
15416 REAL_VALUE_TYPE r;
15417 long l;
15418
15419 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15420 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15421
15422 if (ASSEMBLER_DIALECT == ASM_ATT)
15423 putc ('$', file);
15424 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15425 if (code == 'q')
15426 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15427 (unsigned long long) (int) l);
15428 else
15429 fprintf (file, "0x%08x", (unsigned int) l);
15430 }
15431
15432 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15433 {
15434 REAL_VALUE_TYPE r;
15435 long l[2];
15436
15437 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15438 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15439
15440 if (ASSEMBLER_DIALECT == ASM_ATT)
15441 putc ('$', file);
15442 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15443 }
15444
15445 /* These float cases don't actually occur as immediate operands. */
15446 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15447 {
15448 char dstr[30];
15449
15450 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15451 fputs (dstr, file);
15452 }
15453
15454 else
15455 {
15456 /* We have patterns that allow zero sets of memory, for instance.
15457 In 64-bit mode, we should probably support all 8-byte vectors,
15458 since we can in fact encode that into an immediate. */
15459 if (GET_CODE (x) == CONST_VECTOR)
15460 {
15461 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15462 x = const0_rtx;
15463 }
15464
15465 if (code != 'P' && code != 'p')
15466 {
15467 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15468 {
15469 if (ASSEMBLER_DIALECT == ASM_ATT)
15470 putc ('$', file);
15471 }
15472 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15473 || GET_CODE (x) == LABEL_REF)
15474 {
15475 if (ASSEMBLER_DIALECT == ASM_ATT)
15476 putc ('$', file);
15477 else
15478 fputs ("OFFSET FLAT:", file);
15479 }
15480 }
15481 if (CONST_INT_P (x))
15482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15483 else if (flag_pic || MACHOPIC_INDIRECT)
15484 output_pic_addr_const (file, x, code);
15485 else
15486 output_addr_const (file, x);
15487 }
15488 }
15489
15490 static bool
15491 ix86_print_operand_punct_valid_p (unsigned char code)
15492 {
15493 return (code == '@' || code == '*' || code == '+' || code == '&'
15494 || code == ';' || code == '~' || code == '^');
15495 }
15496 \f
15497 /* Print a memory operand whose address is ADDR. */
15498
15499 static void
15500 ix86_print_operand_address (FILE *file, rtx addr)
15501 {
15502 struct ix86_address parts;
15503 rtx base, index, disp;
15504 int scale;
15505 int ok;
15506 bool vsib = false;
15507 int code = 0;
15508
15509 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15510 {
15511 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15512 gcc_assert (parts.index == NULL_RTX);
15513 parts.index = XVECEXP (addr, 0, 1);
15514 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15515 addr = XVECEXP (addr, 0, 0);
15516 vsib = true;
15517 }
15518 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15519 {
15520 gcc_assert (TARGET_64BIT);
15521 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15522 code = 'q';
15523 }
15524 else
15525 ok = ix86_decompose_address (addr, &parts);
15526
15527 gcc_assert (ok);
15528
15529 base = parts.base;
15530 index = parts.index;
15531 disp = parts.disp;
15532 scale = parts.scale;
15533
15534 switch (parts.seg)
15535 {
15536 case SEG_DEFAULT:
15537 break;
15538 case SEG_FS:
15539 case SEG_GS:
15540 if (ASSEMBLER_DIALECT == ASM_ATT)
15541 putc ('%', file);
15542 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15543 break;
15544 default:
15545 gcc_unreachable ();
15546 }
15547
15548 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15549 if (TARGET_64BIT && !base && !index)
15550 {
15551 rtx symbol = disp;
15552
15553 if (GET_CODE (disp) == CONST
15554 && GET_CODE (XEXP (disp, 0)) == PLUS
15555 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15556 symbol = XEXP (XEXP (disp, 0), 0);
15557
15558 if (GET_CODE (symbol) == LABEL_REF
15559 || (GET_CODE (symbol) == SYMBOL_REF
15560 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15561 base = pc_rtx;
15562 }
15563 if (!base && !index)
15564 {
15565 /* Displacement only requires special attention. */
15566
15567 if (CONST_INT_P (disp))
15568 {
15569 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15570 fputs ("ds:", file);
15571 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15572 }
15573 else if (flag_pic)
15574 output_pic_addr_const (file, disp, 0);
15575 else
15576 output_addr_const (file, disp);
15577 }
15578 else
15579 {
15580 /* Print SImode register names to force addr32 prefix. */
15581 if (SImode_address_operand (addr, VOIDmode))
15582 {
15583 #ifdef ENABLE_CHECKING
15584 gcc_assert (TARGET_64BIT);
15585 switch (GET_CODE (addr))
15586 {
15587 case SUBREG:
15588 gcc_assert (GET_MODE (addr) == SImode);
15589 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15590 break;
15591 case ZERO_EXTEND:
15592 case AND:
15593 gcc_assert (GET_MODE (addr) == DImode);
15594 break;
15595 default:
15596 gcc_unreachable ();
15597 }
15598 #endif
15599 gcc_assert (!code);
15600 code = 'k';
15601 }
15602 else if (code == 0
15603 && TARGET_X32
15604 && disp
15605 && CONST_INT_P (disp)
15606 && INTVAL (disp) < -16*1024*1024)
15607 {
15608 /* X32 runs in 64-bit mode, where displacement, DISP, in
15609 address DISP(%r64), is encoded as 32-bit immediate sign-
15610 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15611 address is %r64 + 0xffffffffbffffd00. When %r64 <
15612 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15613 which is invalid for x32. The correct address is %r64
15614 - 0x40000300 == 0xf7ffdd64. To properly encode
15615 -0x40000300(%r64) for x32, we zero-extend negative
15616 displacement by forcing addr32 prefix which truncates
15617 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15618 zero-extend all negative displacements, including -1(%rsp).
15619 However, for small negative displacements, sign-extension
15620 won't cause overflow. We only zero-extend negative
15621 displacements if they < -16*1024*1024, which is also used
15622 to check legitimate address displacements for PIC. */
15623 code = 'k';
15624 }
15625
15626 if (ASSEMBLER_DIALECT == ASM_ATT)
15627 {
15628 if (disp)
15629 {
15630 if (flag_pic)
15631 output_pic_addr_const (file, disp, 0);
15632 else if (GET_CODE (disp) == LABEL_REF)
15633 output_asm_label (disp);
15634 else
15635 output_addr_const (file, disp);
15636 }
15637
15638 putc ('(', file);
15639 if (base)
15640 print_reg (base, code, file);
15641 if (index)
15642 {
15643 putc (',', file);
15644 print_reg (index, vsib ? 0 : code, file);
15645 if (scale != 1 || vsib)
15646 fprintf (file, ",%d", scale);
15647 }
15648 putc (')', file);
15649 }
15650 else
15651 {
15652 rtx offset = NULL_RTX;
15653
15654 if (disp)
15655 {
15656 /* Pull out the offset of a symbol; print any symbol itself. */
15657 if (GET_CODE (disp) == CONST
15658 && GET_CODE (XEXP (disp, 0)) == PLUS
15659 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15660 {
15661 offset = XEXP (XEXP (disp, 0), 1);
15662 disp = gen_rtx_CONST (VOIDmode,
15663 XEXP (XEXP (disp, 0), 0));
15664 }
15665
15666 if (flag_pic)
15667 output_pic_addr_const (file, disp, 0);
15668 else if (GET_CODE (disp) == LABEL_REF)
15669 output_asm_label (disp);
15670 else if (CONST_INT_P (disp))
15671 offset = disp;
15672 else
15673 output_addr_const (file, disp);
15674 }
15675
15676 putc ('[', file);
15677 if (base)
15678 {
15679 print_reg (base, code, file);
15680 if (offset)
15681 {
15682 if (INTVAL (offset) >= 0)
15683 putc ('+', file);
15684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15685 }
15686 }
15687 else if (offset)
15688 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15689 else
15690 putc ('0', file);
15691
15692 if (index)
15693 {
15694 putc ('+', file);
15695 print_reg (index, vsib ? 0 : code, file);
15696 if (scale != 1 || vsib)
15697 fprintf (file, "*%d", scale);
15698 }
15699 putc (']', file);
15700 }
15701 }
15702 }
15703
15704 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15705
15706 static bool
15707 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15708 {
15709 rtx op;
15710
15711 if (GET_CODE (x) != UNSPEC)
15712 return false;
15713
15714 op = XVECEXP (x, 0, 0);
15715 switch (XINT (x, 1))
15716 {
15717 case UNSPEC_GOTTPOFF:
15718 output_addr_const (file, op);
15719 /* FIXME: This might be @TPOFF in Sun ld. */
15720 fputs ("@gottpoff", file);
15721 break;
15722 case UNSPEC_TPOFF:
15723 output_addr_const (file, op);
15724 fputs ("@tpoff", file);
15725 break;
15726 case UNSPEC_NTPOFF:
15727 output_addr_const (file, op);
15728 if (TARGET_64BIT)
15729 fputs ("@tpoff", file);
15730 else
15731 fputs ("@ntpoff", file);
15732 break;
15733 case UNSPEC_DTPOFF:
15734 output_addr_const (file, op);
15735 fputs ("@dtpoff", file);
15736 break;
15737 case UNSPEC_GOTNTPOFF:
15738 output_addr_const (file, op);
15739 if (TARGET_64BIT)
15740 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15741 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15742 else
15743 fputs ("@gotntpoff", file);
15744 break;
15745 case UNSPEC_INDNTPOFF:
15746 output_addr_const (file, op);
15747 fputs ("@indntpoff", file);
15748 break;
15749 #if TARGET_MACHO
15750 case UNSPEC_MACHOPIC_OFFSET:
15751 output_addr_const (file, op);
15752 putc ('-', file);
15753 machopic_output_function_base_name (file);
15754 break;
15755 #endif
15756
15757 case UNSPEC_STACK_CHECK:
15758 {
15759 int offset;
15760
15761 gcc_assert (flag_split_stack);
15762
15763 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15764 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15765 #else
15766 gcc_unreachable ();
15767 #endif
15768
15769 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15770 }
15771 break;
15772
15773 default:
15774 return false;
15775 }
15776
15777 return true;
15778 }
15779 \f
15780 /* Split one or more double-mode RTL references into pairs of half-mode
15781 references. The RTL can be REG, offsettable MEM, integer constant, or
15782 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15783 split and "num" is its length. lo_half and hi_half are output arrays
15784 that parallel "operands". */
15785
15786 void
15787 split_double_mode (enum machine_mode mode, rtx operands[],
15788 int num, rtx lo_half[], rtx hi_half[])
15789 {
15790 enum machine_mode half_mode;
15791 unsigned int byte;
15792
15793 switch (mode)
15794 {
15795 case TImode:
15796 half_mode = DImode;
15797 break;
15798 case DImode:
15799 half_mode = SImode;
15800 break;
15801 default:
15802 gcc_unreachable ();
15803 }
15804
15805 byte = GET_MODE_SIZE (half_mode);
15806
15807 while (num--)
15808 {
15809 rtx op = operands[num];
15810
15811 /* simplify_subreg refuse to split volatile memory addresses,
15812 but we still have to handle it. */
15813 if (MEM_P (op))
15814 {
15815 lo_half[num] = adjust_address (op, half_mode, 0);
15816 hi_half[num] = adjust_address (op, half_mode, byte);
15817 }
15818 else
15819 {
15820 lo_half[num] = simplify_gen_subreg (half_mode, op,
15821 GET_MODE (op) == VOIDmode
15822 ? mode : GET_MODE (op), 0);
15823 hi_half[num] = simplify_gen_subreg (half_mode, op,
15824 GET_MODE (op) == VOIDmode
15825 ? mode : GET_MODE (op), byte);
15826 }
15827 }
15828 }
15829 \f
15830 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15831 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15832 is the expression of the binary operation. The output may either be
15833 emitted here, or returned to the caller, like all output_* functions.
15834
15835 There is no guarantee that the operands are the same mode, as they
15836 might be within FLOAT or FLOAT_EXTEND expressions. */
15837
15838 #ifndef SYSV386_COMPAT
15839 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15840 wants to fix the assemblers because that causes incompatibility
15841 with gcc. No-one wants to fix gcc because that causes
15842 incompatibility with assemblers... You can use the option of
15843 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15844 #define SYSV386_COMPAT 1
15845 #endif
15846
15847 const char *
15848 output_387_binary_op (rtx insn, rtx *operands)
15849 {
15850 static char buf[40];
15851 const char *p;
15852 const char *ssep;
15853 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15854
15855 #ifdef ENABLE_CHECKING
15856 /* Even if we do not want to check the inputs, this documents input
15857 constraints. Which helps in understanding the following code. */
15858 if (STACK_REG_P (operands[0])
15859 && ((REG_P (operands[1])
15860 && REGNO (operands[0]) == REGNO (operands[1])
15861 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15862 || (REG_P (operands[2])
15863 && REGNO (operands[0]) == REGNO (operands[2])
15864 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15865 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15866 ; /* ok */
15867 else
15868 gcc_assert (is_sse);
15869 #endif
15870
15871 switch (GET_CODE (operands[3]))
15872 {
15873 case PLUS:
15874 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15875 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15876 p = "fiadd";
15877 else
15878 p = "fadd";
15879 ssep = "vadd";
15880 break;
15881
15882 case MINUS:
15883 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15884 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15885 p = "fisub";
15886 else
15887 p = "fsub";
15888 ssep = "vsub";
15889 break;
15890
15891 case MULT:
15892 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15893 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15894 p = "fimul";
15895 else
15896 p = "fmul";
15897 ssep = "vmul";
15898 break;
15899
15900 case DIV:
15901 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15902 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15903 p = "fidiv";
15904 else
15905 p = "fdiv";
15906 ssep = "vdiv";
15907 break;
15908
15909 default:
15910 gcc_unreachable ();
15911 }
15912
15913 if (is_sse)
15914 {
15915 if (TARGET_AVX)
15916 {
15917 strcpy (buf, ssep);
15918 if (GET_MODE (operands[0]) == SFmode)
15919 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15920 else
15921 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15922 }
15923 else
15924 {
15925 strcpy (buf, ssep + 1);
15926 if (GET_MODE (operands[0]) == SFmode)
15927 strcat (buf, "ss\t{%2, %0|%0, %2}");
15928 else
15929 strcat (buf, "sd\t{%2, %0|%0, %2}");
15930 }
15931 return buf;
15932 }
15933 strcpy (buf, p);
15934
15935 switch (GET_CODE (operands[3]))
15936 {
15937 case MULT:
15938 case PLUS:
15939 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15940 {
15941 rtx temp = operands[2];
15942 operands[2] = operands[1];
15943 operands[1] = temp;
15944 }
15945
15946 /* know operands[0] == operands[1]. */
15947
15948 if (MEM_P (operands[2]))
15949 {
15950 p = "%Z2\t%2";
15951 break;
15952 }
15953
15954 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15955 {
15956 if (STACK_TOP_P (operands[0]))
15957 /* How is it that we are storing to a dead operand[2]?
15958 Well, presumably operands[1] is dead too. We can't
15959 store the result to st(0) as st(0) gets popped on this
15960 instruction. Instead store to operands[2] (which I
15961 think has to be st(1)). st(1) will be popped later.
15962 gcc <= 2.8.1 didn't have this check and generated
15963 assembly code that the Unixware assembler rejected. */
15964 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15965 else
15966 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15967 break;
15968 }
15969
15970 if (STACK_TOP_P (operands[0]))
15971 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15972 else
15973 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15974 break;
15975
15976 case MINUS:
15977 case DIV:
15978 if (MEM_P (operands[1]))
15979 {
15980 p = "r%Z1\t%1";
15981 break;
15982 }
15983
15984 if (MEM_P (operands[2]))
15985 {
15986 p = "%Z2\t%2";
15987 break;
15988 }
15989
15990 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15991 {
15992 #if SYSV386_COMPAT
15993 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15994 derived assemblers, confusingly reverse the direction of
15995 the operation for fsub{r} and fdiv{r} when the
15996 destination register is not st(0). The Intel assembler
15997 doesn't have this brain damage. Read !SYSV386_COMPAT to
15998 figure out what the hardware really does. */
15999 if (STACK_TOP_P (operands[0]))
16000 p = "{p\t%0, %2|rp\t%2, %0}";
16001 else
16002 p = "{rp\t%2, %0|p\t%0, %2}";
16003 #else
16004 if (STACK_TOP_P (operands[0]))
16005 /* As above for fmul/fadd, we can't store to st(0). */
16006 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16007 else
16008 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16009 #endif
16010 break;
16011 }
16012
16013 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16014 {
16015 #if SYSV386_COMPAT
16016 if (STACK_TOP_P (operands[0]))
16017 p = "{rp\t%0, %1|p\t%1, %0}";
16018 else
16019 p = "{p\t%1, %0|rp\t%0, %1}";
16020 #else
16021 if (STACK_TOP_P (operands[0]))
16022 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16023 else
16024 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16025 #endif
16026 break;
16027 }
16028
16029 if (STACK_TOP_P (operands[0]))
16030 {
16031 if (STACK_TOP_P (operands[1]))
16032 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16033 else
16034 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16035 break;
16036 }
16037 else if (STACK_TOP_P (operands[1]))
16038 {
16039 #if SYSV386_COMPAT
16040 p = "{\t%1, %0|r\t%0, %1}";
16041 #else
16042 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16043 #endif
16044 }
16045 else
16046 {
16047 #if SYSV386_COMPAT
16048 p = "{r\t%2, %0|\t%0, %2}";
16049 #else
16050 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16051 #endif
16052 }
16053 break;
16054
16055 default:
16056 gcc_unreachable ();
16057 }
16058
16059 strcat (buf, p);
16060 return buf;
16061 }
16062
16063 /* Check if a 256bit AVX register is referenced inside of EXP. */
16064
16065 static int
16066 ix86_check_avx256_register (rtx *pexp, void *)
16067 {
16068 rtx exp = *pexp;
16069
16070 if (GET_CODE (exp) == SUBREG)
16071 exp = SUBREG_REG (exp);
16072
16073 if (REG_P (exp)
16074 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16075 return 1;
16076
16077 return 0;
16078 }
16079
16080 /* Return needed mode for entity in optimize_mode_switching pass. */
16081
16082 static int
16083 ix86_avx_u128_mode_needed (rtx insn)
16084 {
16085 if (CALL_P (insn))
16086 {
16087 rtx link;
16088
16089 /* Needed mode is set to AVX_U128_CLEAN if there are
16090 no 256bit modes used in function arguments. */
16091 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16092 link;
16093 link = XEXP (link, 1))
16094 {
16095 if (GET_CODE (XEXP (link, 0)) == USE)
16096 {
16097 rtx arg = XEXP (XEXP (link, 0), 0);
16098
16099 if (ix86_check_avx256_register (&arg, NULL))
16100 return AVX_U128_DIRTY;
16101 }
16102 }
16103
16104 return AVX_U128_CLEAN;
16105 }
16106
16107 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16108 changes state only when a 256bit register is written to, but we need
16109 to prevent the compiler from moving optimal insertion point above
16110 eventual read from 256bit register. */
16111 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16112 return AVX_U128_DIRTY;
16113
16114 return AVX_U128_ANY;
16115 }
16116
16117 /* Return mode that i387 must be switched into
16118 prior to the execution of insn. */
16119
16120 static int
16121 ix86_i387_mode_needed (int entity, rtx insn)
16122 {
16123 enum attr_i387_cw mode;
16124
16125 /* The mode UNINITIALIZED is used to store control word after a
16126 function call or ASM pattern. The mode ANY specify that function
16127 has no requirements on the control word and make no changes in the
16128 bits we are interested in. */
16129
16130 if (CALL_P (insn)
16131 || (NONJUMP_INSN_P (insn)
16132 && (asm_noperands (PATTERN (insn)) >= 0
16133 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16134 return I387_CW_UNINITIALIZED;
16135
16136 if (recog_memoized (insn) < 0)
16137 return I387_CW_ANY;
16138
16139 mode = get_attr_i387_cw (insn);
16140
16141 switch (entity)
16142 {
16143 case I387_TRUNC:
16144 if (mode == I387_CW_TRUNC)
16145 return mode;
16146 break;
16147
16148 case I387_FLOOR:
16149 if (mode == I387_CW_FLOOR)
16150 return mode;
16151 break;
16152
16153 case I387_CEIL:
16154 if (mode == I387_CW_CEIL)
16155 return mode;
16156 break;
16157
16158 case I387_MASK_PM:
16159 if (mode == I387_CW_MASK_PM)
16160 return mode;
16161 break;
16162
16163 default:
16164 gcc_unreachable ();
16165 }
16166
16167 return I387_CW_ANY;
16168 }
16169
16170 /* Return mode that entity must be switched into
16171 prior to the execution of insn. */
16172
16173 static int
16174 ix86_mode_needed (int entity, rtx insn)
16175 {
16176 switch (entity)
16177 {
16178 case AVX_U128:
16179 return ix86_avx_u128_mode_needed (insn);
16180 case I387_TRUNC:
16181 case I387_FLOOR:
16182 case I387_CEIL:
16183 case I387_MASK_PM:
16184 return ix86_i387_mode_needed (entity, insn);
16185 default:
16186 gcc_unreachable ();
16187 }
16188 return 0;
16189 }
16190
16191 /* Check if a 256bit AVX register is referenced in stores. */
16192
16193 static void
16194 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16195 {
16196 if (ix86_check_avx256_register (&dest, NULL))
16197 {
16198 bool *used = (bool *) data;
16199 *used = true;
16200 }
16201 }
16202
16203 /* Calculate mode of upper 128bit AVX registers after the insn. */
16204
16205 static int
16206 ix86_avx_u128_mode_after (int mode, rtx insn)
16207 {
16208 rtx pat = PATTERN (insn);
16209
16210 if (vzeroupper_operation (pat, VOIDmode)
16211 || vzeroall_operation (pat, VOIDmode))
16212 return AVX_U128_CLEAN;
16213
16214 /* We know that state is clean after CALL insn if there are no
16215 256bit registers used in the function return register. */
16216 if (CALL_P (insn))
16217 {
16218 bool avx_reg256_found = false;
16219 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16220
16221 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16222 }
16223
16224 /* Otherwise, return current mode. Remember that if insn
16225 references AVX 256bit registers, the mode was already changed
16226 to DIRTY from MODE_NEEDED. */
16227 return mode;
16228 }
16229
16230 /* Return the mode that an insn results in. */
16231
16232 int
16233 ix86_mode_after (int entity, int mode, rtx insn)
16234 {
16235 switch (entity)
16236 {
16237 case AVX_U128:
16238 return ix86_avx_u128_mode_after (mode, insn);
16239 case I387_TRUNC:
16240 case I387_FLOOR:
16241 case I387_CEIL:
16242 case I387_MASK_PM:
16243 return mode;
16244 default:
16245 gcc_unreachable ();
16246 }
16247 }
16248
16249 static int
16250 ix86_avx_u128_mode_entry (void)
16251 {
16252 tree arg;
16253
16254 /* Entry mode is set to AVX_U128_DIRTY if there are
16255 256bit modes used in function arguments. */
16256 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16257 arg = TREE_CHAIN (arg))
16258 {
16259 rtx incoming = DECL_INCOMING_RTL (arg);
16260
16261 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16262 return AVX_U128_DIRTY;
16263 }
16264
16265 return AVX_U128_CLEAN;
16266 }
16267
16268 /* Return a mode that ENTITY is assumed to be
16269 switched to at function entry. */
16270
16271 static int
16272 ix86_mode_entry (int entity)
16273 {
16274 switch (entity)
16275 {
16276 case AVX_U128:
16277 return ix86_avx_u128_mode_entry ();
16278 case I387_TRUNC:
16279 case I387_FLOOR:
16280 case I387_CEIL:
16281 case I387_MASK_PM:
16282 return I387_CW_ANY;
16283 default:
16284 gcc_unreachable ();
16285 }
16286 }
16287
16288 static int
16289 ix86_avx_u128_mode_exit (void)
16290 {
16291 rtx reg = crtl->return_rtx;
16292
16293 /* Exit mode is set to AVX_U128_DIRTY if there are
16294 256bit modes used in the function return register. */
16295 if (reg && ix86_check_avx256_register (&reg, NULL))
16296 return AVX_U128_DIRTY;
16297
16298 return AVX_U128_CLEAN;
16299 }
16300
16301 /* Return a mode that ENTITY is assumed to be
16302 switched to at function exit. */
16303
16304 static int
16305 ix86_mode_exit (int entity)
16306 {
16307 switch (entity)
16308 {
16309 case AVX_U128:
16310 return ix86_avx_u128_mode_exit ();
16311 case I387_TRUNC:
16312 case I387_FLOOR:
16313 case I387_CEIL:
16314 case I387_MASK_PM:
16315 return I387_CW_ANY;
16316 default:
16317 gcc_unreachable ();
16318 }
16319 }
16320
16321 static int
16322 ix86_mode_priority (int, int n)
16323 {
16324 return n;
16325 }
16326
16327 /* Output code to initialize control word copies used by trunc?f?i and
16328 rounding patterns. CURRENT_MODE is set to current control word,
16329 while NEW_MODE is set to new control word. */
16330
16331 static void
16332 emit_i387_cw_initialization (int mode)
16333 {
16334 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16335 rtx new_mode;
16336
16337 enum ix86_stack_slot slot;
16338
16339 rtx reg = gen_reg_rtx (HImode);
16340
16341 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16342 emit_move_insn (reg, copy_rtx (stored_mode));
16343
16344 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16345 || optimize_insn_for_size_p ())
16346 {
16347 switch (mode)
16348 {
16349 case I387_CW_TRUNC:
16350 /* round toward zero (truncate) */
16351 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16352 slot = SLOT_CW_TRUNC;
16353 break;
16354
16355 case I387_CW_FLOOR:
16356 /* round down toward -oo */
16357 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16358 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16359 slot = SLOT_CW_FLOOR;
16360 break;
16361
16362 case I387_CW_CEIL:
16363 /* round up toward +oo */
16364 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16365 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16366 slot = SLOT_CW_CEIL;
16367 break;
16368
16369 case I387_CW_MASK_PM:
16370 /* mask precision exception for nearbyint() */
16371 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16372 slot = SLOT_CW_MASK_PM;
16373 break;
16374
16375 default:
16376 gcc_unreachable ();
16377 }
16378 }
16379 else
16380 {
16381 switch (mode)
16382 {
16383 case I387_CW_TRUNC:
16384 /* round toward zero (truncate) */
16385 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16386 slot = SLOT_CW_TRUNC;
16387 break;
16388
16389 case I387_CW_FLOOR:
16390 /* round down toward -oo */
16391 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16392 slot = SLOT_CW_FLOOR;
16393 break;
16394
16395 case I387_CW_CEIL:
16396 /* round up toward +oo */
16397 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16398 slot = SLOT_CW_CEIL;
16399 break;
16400
16401 case I387_CW_MASK_PM:
16402 /* mask precision exception for nearbyint() */
16403 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16404 slot = SLOT_CW_MASK_PM;
16405 break;
16406
16407 default:
16408 gcc_unreachable ();
16409 }
16410 }
16411
16412 gcc_assert (slot < MAX_386_STACK_LOCALS);
16413
16414 new_mode = assign_386_stack_local (HImode, slot);
16415 emit_move_insn (new_mode, reg);
16416 }
16417
16418 /* Emit vzeroupper. */
16419
16420 void
16421 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16422 {
16423 int i;
16424
16425 /* Cancel automatic vzeroupper insertion if there are
16426 live call-saved SSE registers at the insertion point. */
16427
16428 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16429 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16430 return;
16431
16432 if (TARGET_64BIT)
16433 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16434 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16435 return;
16436
16437 emit_insn (gen_avx_vzeroupper ());
16438 }
16439
16440 /* Generate one or more insns to set ENTITY to MODE. */
16441
16442 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16443 is the set of hard registers live at the point where the insn(s)
16444 are to be inserted. */
16445
16446 static void
16447 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16448 HARD_REG_SET regs_live)
16449 {
16450 switch (entity)
16451 {
16452 case AVX_U128:
16453 if (mode == AVX_U128_CLEAN)
16454 ix86_avx_emit_vzeroupper (regs_live);
16455 break;
16456 case I387_TRUNC:
16457 case I387_FLOOR:
16458 case I387_CEIL:
16459 case I387_MASK_PM:
16460 if (mode != I387_CW_ANY
16461 && mode != I387_CW_UNINITIALIZED)
16462 emit_i387_cw_initialization (mode);
16463 break;
16464 default:
16465 gcc_unreachable ();
16466 }
16467 }
16468
16469 /* Output code for INSN to convert a float to a signed int. OPERANDS
16470 are the insn operands. The output may be [HSD]Imode and the input
16471 operand may be [SDX]Fmode. */
16472
16473 const char *
16474 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16475 {
16476 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16477 int dimode_p = GET_MODE (operands[0]) == DImode;
16478 int round_mode = get_attr_i387_cw (insn);
16479
16480 /* Jump through a hoop or two for DImode, since the hardware has no
16481 non-popping instruction. We used to do this a different way, but
16482 that was somewhat fragile and broke with post-reload splitters. */
16483 if ((dimode_p || fisttp) && !stack_top_dies)
16484 output_asm_insn ("fld\t%y1", operands);
16485
16486 gcc_assert (STACK_TOP_P (operands[1]));
16487 gcc_assert (MEM_P (operands[0]));
16488 gcc_assert (GET_MODE (operands[1]) != TFmode);
16489
16490 if (fisttp)
16491 output_asm_insn ("fisttp%Z0\t%0", operands);
16492 else
16493 {
16494 if (round_mode != I387_CW_ANY)
16495 output_asm_insn ("fldcw\t%3", operands);
16496 if (stack_top_dies || dimode_p)
16497 output_asm_insn ("fistp%Z0\t%0", operands);
16498 else
16499 output_asm_insn ("fist%Z0\t%0", operands);
16500 if (round_mode != I387_CW_ANY)
16501 output_asm_insn ("fldcw\t%2", operands);
16502 }
16503
16504 return "";
16505 }
16506
16507 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16508 have the values zero or one, indicates the ffreep insn's operand
16509 from the OPERANDS array. */
16510
16511 static const char *
16512 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16513 {
16514 if (TARGET_USE_FFREEP)
16515 #ifdef HAVE_AS_IX86_FFREEP
16516 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16517 #else
16518 {
16519 static char retval[32];
16520 int regno = REGNO (operands[opno]);
16521
16522 gcc_assert (STACK_REGNO_P (regno));
16523
16524 regno -= FIRST_STACK_REG;
16525
16526 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16527 return retval;
16528 }
16529 #endif
16530
16531 return opno ? "fstp\t%y1" : "fstp\t%y0";
16532 }
16533
16534
16535 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16536 should be used. UNORDERED_P is true when fucom should be used. */
16537
16538 const char *
16539 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16540 {
16541 int stack_top_dies;
16542 rtx cmp_op0, cmp_op1;
16543 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16544
16545 if (eflags_p)
16546 {
16547 cmp_op0 = operands[0];
16548 cmp_op1 = operands[1];
16549 }
16550 else
16551 {
16552 cmp_op0 = operands[1];
16553 cmp_op1 = operands[2];
16554 }
16555
16556 if (is_sse)
16557 {
16558 if (GET_MODE (operands[0]) == SFmode)
16559 if (unordered_p)
16560 return "%vucomiss\t{%1, %0|%0, %1}";
16561 else
16562 return "%vcomiss\t{%1, %0|%0, %1}";
16563 else
16564 if (unordered_p)
16565 return "%vucomisd\t{%1, %0|%0, %1}";
16566 else
16567 return "%vcomisd\t{%1, %0|%0, %1}";
16568 }
16569
16570 gcc_assert (STACK_TOP_P (cmp_op0));
16571
16572 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16573
16574 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16575 {
16576 if (stack_top_dies)
16577 {
16578 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16579 return output_387_ffreep (operands, 1);
16580 }
16581 else
16582 return "ftst\n\tfnstsw\t%0";
16583 }
16584
16585 if (STACK_REG_P (cmp_op1)
16586 && stack_top_dies
16587 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16588 && REGNO (cmp_op1) != FIRST_STACK_REG)
16589 {
16590 /* If both the top of the 387 stack dies, and the other operand
16591 is also a stack register that dies, then this must be a
16592 `fcompp' float compare */
16593
16594 if (eflags_p)
16595 {
16596 /* There is no double popping fcomi variant. Fortunately,
16597 eflags is immune from the fstp's cc clobbering. */
16598 if (unordered_p)
16599 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16600 else
16601 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16602 return output_387_ffreep (operands, 0);
16603 }
16604 else
16605 {
16606 if (unordered_p)
16607 return "fucompp\n\tfnstsw\t%0";
16608 else
16609 return "fcompp\n\tfnstsw\t%0";
16610 }
16611 }
16612 else
16613 {
16614 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16615
16616 static const char * const alt[16] =
16617 {
16618 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16619 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16620 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16621 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16622
16623 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16624 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16625 NULL,
16626 NULL,
16627
16628 "fcomi\t{%y1, %0|%0, %y1}",
16629 "fcomip\t{%y1, %0|%0, %y1}",
16630 "fucomi\t{%y1, %0|%0, %y1}",
16631 "fucomip\t{%y1, %0|%0, %y1}",
16632
16633 NULL,
16634 NULL,
16635 NULL,
16636 NULL
16637 };
16638
16639 int mask;
16640 const char *ret;
16641
16642 mask = eflags_p << 3;
16643 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16644 mask |= unordered_p << 1;
16645 mask |= stack_top_dies;
16646
16647 gcc_assert (mask < 16);
16648 ret = alt[mask];
16649 gcc_assert (ret);
16650
16651 return ret;
16652 }
16653 }
16654
16655 void
16656 ix86_output_addr_vec_elt (FILE *file, int value)
16657 {
16658 const char *directive = ASM_LONG;
16659
16660 #ifdef ASM_QUAD
16661 if (TARGET_LP64)
16662 directive = ASM_QUAD;
16663 #else
16664 gcc_assert (!TARGET_64BIT);
16665 #endif
16666
16667 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16668 }
16669
16670 void
16671 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16672 {
16673 const char *directive = ASM_LONG;
16674
16675 #ifdef ASM_QUAD
16676 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16677 directive = ASM_QUAD;
16678 #else
16679 gcc_assert (!TARGET_64BIT);
16680 #endif
16681 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16682 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16683 fprintf (file, "%s%s%d-%s%d\n",
16684 directive, LPREFIX, value, LPREFIX, rel);
16685 else if (HAVE_AS_GOTOFF_IN_DATA)
16686 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16687 #if TARGET_MACHO
16688 else if (TARGET_MACHO)
16689 {
16690 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16691 machopic_output_function_base_name (file);
16692 putc ('\n', file);
16693 }
16694 #endif
16695 else
16696 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16697 GOT_SYMBOL_NAME, LPREFIX, value);
16698 }
16699 \f
16700 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16701 for the target. */
16702
16703 void
16704 ix86_expand_clear (rtx dest)
16705 {
16706 rtx tmp;
16707
16708 /* We play register width games, which are only valid after reload. */
16709 gcc_assert (reload_completed);
16710
16711 /* Avoid HImode and its attendant prefix byte. */
16712 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16713 dest = gen_rtx_REG (SImode, REGNO (dest));
16714 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16715
16716 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16717 {
16718 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16719 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16720 }
16721
16722 emit_insn (tmp);
16723 }
16724
16725 /* X is an unchanging MEM. If it is a constant pool reference, return
16726 the constant pool rtx, else NULL. */
16727
16728 rtx
16729 maybe_get_pool_constant (rtx x)
16730 {
16731 x = ix86_delegitimize_address (XEXP (x, 0));
16732
16733 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16734 return get_pool_constant (x);
16735
16736 return NULL_RTX;
16737 }
16738
16739 void
16740 ix86_expand_move (enum machine_mode mode, rtx operands[])
16741 {
16742 rtx op0, op1;
16743 enum tls_model model;
16744
16745 op0 = operands[0];
16746 op1 = operands[1];
16747
16748 if (GET_CODE (op1) == SYMBOL_REF)
16749 {
16750 rtx tmp;
16751
16752 model = SYMBOL_REF_TLS_MODEL (op1);
16753 if (model)
16754 {
16755 op1 = legitimize_tls_address (op1, model, true);
16756 op1 = force_operand (op1, op0);
16757 if (op1 == op0)
16758 return;
16759 op1 = convert_to_mode (mode, op1, 1);
16760 }
16761 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16762 op1 = tmp;
16763 }
16764 else if (GET_CODE (op1) == CONST
16765 && GET_CODE (XEXP (op1, 0)) == PLUS
16766 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16767 {
16768 rtx addend = XEXP (XEXP (op1, 0), 1);
16769 rtx symbol = XEXP (XEXP (op1, 0), 0);
16770 rtx tmp;
16771
16772 model = SYMBOL_REF_TLS_MODEL (symbol);
16773 if (model)
16774 tmp = legitimize_tls_address (symbol, model, true);
16775 else
16776 tmp = legitimize_pe_coff_symbol (symbol, true);
16777
16778 if (tmp)
16779 {
16780 tmp = force_operand (tmp, NULL);
16781 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16782 op0, 1, OPTAB_DIRECT);
16783 if (tmp == op0)
16784 return;
16785 op1 = convert_to_mode (mode, tmp, 1);
16786 }
16787 }
16788
16789 if ((flag_pic || MACHOPIC_INDIRECT)
16790 && symbolic_operand (op1, mode))
16791 {
16792 if (TARGET_MACHO && !TARGET_64BIT)
16793 {
16794 #if TARGET_MACHO
16795 /* dynamic-no-pic */
16796 if (MACHOPIC_INDIRECT)
16797 {
16798 rtx temp = ((reload_in_progress
16799 || ((op0 && REG_P (op0))
16800 && mode == Pmode))
16801 ? op0 : gen_reg_rtx (Pmode));
16802 op1 = machopic_indirect_data_reference (op1, temp);
16803 if (MACHOPIC_PURE)
16804 op1 = machopic_legitimize_pic_address (op1, mode,
16805 temp == op1 ? 0 : temp);
16806 }
16807 if (op0 != op1 && GET_CODE (op0) != MEM)
16808 {
16809 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16810 emit_insn (insn);
16811 return;
16812 }
16813 if (GET_CODE (op0) == MEM)
16814 op1 = force_reg (Pmode, op1);
16815 else
16816 {
16817 rtx temp = op0;
16818 if (GET_CODE (temp) != REG)
16819 temp = gen_reg_rtx (Pmode);
16820 temp = legitimize_pic_address (op1, temp);
16821 if (temp == op0)
16822 return;
16823 op1 = temp;
16824 }
16825 /* dynamic-no-pic */
16826 #endif
16827 }
16828 else
16829 {
16830 if (MEM_P (op0))
16831 op1 = force_reg (mode, op1);
16832 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16833 {
16834 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16835 op1 = legitimize_pic_address (op1, reg);
16836 if (op0 == op1)
16837 return;
16838 op1 = convert_to_mode (mode, op1, 1);
16839 }
16840 }
16841 }
16842 else
16843 {
16844 if (MEM_P (op0)
16845 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16846 || !push_operand (op0, mode))
16847 && MEM_P (op1))
16848 op1 = force_reg (mode, op1);
16849
16850 if (push_operand (op0, mode)
16851 && ! general_no_elim_operand (op1, mode))
16852 op1 = copy_to_mode_reg (mode, op1);
16853
16854 /* Force large constants in 64bit compilation into register
16855 to get them CSEed. */
16856 if (can_create_pseudo_p ()
16857 && (mode == DImode) && TARGET_64BIT
16858 && immediate_operand (op1, mode)
16859 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16860 && !register_operand (op0, mode)
16861 && optimize)
16862 op1 = copy_to_mode_reg (mode, op1);
16863
16864 if (can_create_pseudo_p ()
16865 && FLOAT_MODE_P (mode)
16866 && GET_CODE (op1) == CONST_DOUBLE)
16867 {
16868 /* If we are loading a floating point constant to a register,
16869 force the value to memory now, since we'll get better code
16870 out the back end. */
16871
16872 op1 = validize_mem (force_const_mem (mode, op1));
16873 if (!register_operand (op0, mode))
16874 {
16875 rtx temp = gen_reg_rtx (mode);
16876 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16877 emit_move_insn (op0, temp);
16878 return;
16879 }
16880 }
16881 }
16882
16883 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16884 }
16885
16886 void
16887 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16888 {
16889 rtx op0 = operands[0], op1 = operands[1];
16890 unsigned int align = GET_MODE_ALIGNMENT (mode);
16891
16892 if (push_operand (op0, VOIDmode))
16893 op0 = emit_move_resolve_push (mode, op0);
16894
16895 /* Force constants other than zero into memory. We do not know how
16896 the instructions used to build constants modify the upper 64 bits
16897 of the register, once we have that information we may be able
16898 to handle some of them more efficiently. */
16899 if (can_create_pseudo_p ()
16900 && register_operand (op0, mode)
16901 && (CONSTANT_P (op1)
16902 || (GET_CODE (op1) == SUBREG
16903 && CONSTANT_P (SUBREG_REG (op1))))
16904 && !standard_sse_constant_p (op1))
16905 op1 = validize_mem (force_const_mem (mode, op1));
16906
16907 /* We need to check memory alignment for SSE mode since attribute
16908 can make operands unaligned. */
16909 if (can_create_pseudo_p ()
16910 && SSE_REG_MODE_P (mode)
16911 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16912 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16913 {
16914 rtx tmp[2];
16915
16916 /* ix86_expand_vector_move_misalign() does not like constants ... */
16917 if (CONSTANT_P (op1)
16918 || (GET_CODE (op1) == SUBREG
16919 && CONSTANT_P (SUBREG_REG (op1))))
16920 op1 = validize_mem (force_const_mem (mode, op1));
16921
16922 /* ... nor both arguments in memory. */
16923 if (!register_operand (op0, mode)
16924 && !register_operand (op1, mode))
16925 op1 = force_reg (mode, op1);
16926
16927 tmp[0] = op0; tmp[1] = op1;
16928 ix86_expand_vector_move_misalign (mode, tmp);
16929 return;
16930 }
16931
16932 /* Make operand1 a register if it isn't already. */
16933 if (can_create_pseudo_p ()
16934 && !register_operand (op0, mode)
16935 && !register_operand (op1, mode))
16936 {
16937 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16938 return;
16939 }
16940
16941 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16942 }
16943
16944 /* Split 32-byte AVX unaligned load and store if needed. */
16945
16946 static void
16947 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16948 {
16949 rtx m;
16950 rtx (*extract) (rtx, rtx, rtx);
16951 rtx (*load_unaligned) (rtx, rtx);
16952 rtx (*store_unaligned) (rtx, rtx);
16953 enum machine_mode mode;
16954
16955 switch (GET_MODE (op0))
16956 {
16957 default:
16958 gcc_unreachable ();
16959 case V32QImode:
16960 extract = gen_avx_vextractf128v32qi;
16961 load_unaligned = gen_avx_loaddquv32qi;
16962 store_unaligned = gen_avx_storedquv32qi;
16963 mode = V16QImode;
16964 break;
16965 case V8SFmode:
16966 extract = gen_avx_vextractf128v8sf;
16967 load_unaligned = gen_avx_loadups256;
16968 store_unaligned = gen_avx_storeups256;
16969 mode = V4SFmode;
16970 break;
16971 case V4DFmode:
16972 extract = gen_avx_vextractf128v4df;
16973 load_unaligned = gen_avx_loadupd256;
16974 store_unaligned = gen_avx_storeupd256;
16975 mode = V2DFmode;
16976 break;
16977 }
16978
16979 if (MEM_P (op1))
16980 {
16981 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16982 {
16983 rtx r = gen_reg_rtx (mode);
16984 m = adjust_address (op1, mode, 0);
16985 emit_move_insn (r, m);
16986 m = adjust_address (op1, mode, 16);
16987 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16988 emit_move_insn (op0, r);
16989 }
16990 /* Normal *mov<mode>_internal pattern will handle
16991 unaligned loads just fine if misaligned_operand
16992 is true, and without the UNSPEC it can be combined
16993 with arithmetic instructions. */
16994 else if (misaligned_operand (op1, GET_MODE (op1)))
16995 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16996 else
16997 emit_insn (load_unaligned (op0, op1));
16998 }
16999 else if (MEM_P (op0))
17000 {
17001 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17002 {
17003 m = adjust_address (op0, mode, 0);
17004 emit_insn (extract (m, op1, const0_rtx));
17005 m = adjust_address (op0, mode, 16);
17006 emit_insn (extract (m, op1, const1_rtx));
17007 }
17008 else
17009 emit_insn (store_unaligned (op0, op1));
17010 }
17011 else
17012 gcc_unreachable ();
17013 }
17014
17015 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17016 straight to ix86_expand_vector_move. */
17017 /* Code generation for scalar reg-reg moves of single and double precision data:
17018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17019 movaps reg, reg
17020 else
17021 movss reg, reg
17022 if (x86_sse_partial_reg_dependency == true)
17023 movapd reg, reg
17024 else
17025 movsd reg, reg
17026
17027 Code generation for scalar loads of double precision data:
17028 if (x86_sse_split_regs == true)
17029 movlpd mem, reg (gas syntax)
17030 else
17031 movsd mem, reg
17032
17033 Code generation for unaligned packed loads of single precision data
17034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17035 if (x86_sse_unaligned_move_optimal)
17036 movups mem, reg
17037
17038 if (x86_sse_partial_reg_dependency == true)
17039 {
17040 xorps reg, reg
17041 movlps mem, reg
17042 movhps mem+8, reg
17043 }
17044 else
17045 {
17046 movlps mem, reg
17047 movhps mem+8, reg
17048 }
17049
17050 Code generation for unaligned packed loads of double precision data
17051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17052 if (x86_sse_unaligned_move_optimal)
17053 movupd mem, reg
17054
17055 if (x86_sse_split_regs == true)
17056 {
17057 movlpd mem, reg
17058 movhpd mem+8, reg
17059 }
17060 else
17061 {
17062 movsd mem, reg
17063 movhpd mem+8, reg
17064 }
17065 */
17066
17067 void
17068 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17069 {
17070 rtx op0, op1, orig_op0 = NULL_RTX, m;
17071 rtx (*load_unaligned) (rtx, rtx);
17072 rtx (*store_unaligned) (rtx, rtx);
17073
17074 op0 = operands[0];
17075 op1 = operands[1];
17076
17077 if (GET_MODE_SIZE (mode) == 64)
17078 {
17079 switch (GET_MODE_CLASS (mode))
17080 {
17081 case MODE_VECTOR_INT:
17082 case MODE_INT:
17083 if (GET_MODE (op0) != V16SImode)
17084 {
17085 if (!MEM_P (op0))
17086 {
17087 orig_op0 = op0;
17088 op0 = gen_reg_rtx (V16SImode);
17089 }
17090 else
17091 op0 = gen_lowpart (V16SImode, op0);
17092 }
17093 op1 = gen_lowpart (V16SImode, op1);
17094 /* FALLTHRU */
17095
17096 case MODE_VECTOR_FLOAT:
17097 switch (GET_MODE (op0))
17098 {
17099 default:
17100 gcc_unreachable ();
17101 case V16SImode:
17102 load_unaligned = gen_avx512f_loaddquv16si;
17103 store_unaligned = gen_avx512f_storedquv16si;
17104 break;
17105 case V16SFmode:
17106 load_unaligned = gen_avx512f_loadups512;
17107 store_unaligned = gen_avx512f_storeups512;
17108 break;
17109 case V8DFmode:
17110 load_unaligned = gen_avx512f_loadupd512;
17111 store_unaligned = gen_avx512f_storeupd512;
17112 break;
17113 }
17114
17115 if (MEM_P (op1))
17116 emit_insn (load_unaligned (op0, op1));
17117 else if (MEM_P (op0))
17118 emit_insn (store_unaligned (op0, op1));
17119 else
17120 gcc_unreachable ();
17121 if (orig_op0)
17122 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17123 break;
17124
17125 default:
17126 gcc_unreachable ();
17127 }
17128
17129 return;
17130 }
17131
17132 if (TARGET_AVX
17133 && GET_MODE_SIZE (mode) == 32)
17134 {
17135 switch (GET_MODE_CLASS (mode))
17136 {
17137 case MODE_VECTOR_INT:
17138 case MODE_INT:
17139 if (GET_MODE (op0) != V32QImode)
17140 {
17141 if (!MEM_P (op0))
17142 {
17143 orig_op0 = op0;
17144 op0 = gen_reg_rtx (V32QImode);
17145 }
17146 else
17147 op0 = gen_lowpart (V32QImode, op0);
17148 }
17149 op1 = gen_lowpart (V32QImode, op1);
17150 /* FALLTHRU */
17151
17152 case MODE_VECTOR_FLOAT:
17153 ix86_avx256_split_vector_move_misalign (op0, op1);
17154 if (orig_op0)
17155 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17156 break;
17157
17158 default:
17159 gcc_unreachable ();
17160 }
17161
17162 return;
17163 }
17164
17165 if (MEM_P (op1))
17166 {
17167 /* Normal *mov<mode>_internal pattern will handle
17168 unaligned loads just fine if misaligned_operand
17169 is true, and without the UNSPEC it can be combined
17170 with arithmetic instructions. */
17171 if (TARGET_AVX
17172 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17173 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17174 && misaligned_operand (op1, GET_MODE (op1)))
17175 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17176 /* ??? If we have typed data, then it would appear that using
17177 movdqu is the only way to get unaligned data loaded with
17178 integer type. */
17179 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17180 {
17181 if (GET_MODE (op0) != V16QImode)
17182 {
17183 orig_op0 = op0;
17184 op0 = gen_reg_rtx (V16QImode);
17185 }
17186 op1 = gen_lowpart (V16QImode, op1);
17187 /* We will eventually emit movups based on insn attributes. */
17188 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17189 if (orig_op0)
17190 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17191 }
17192 else if (TARGET_SSE2 && mode == V2DFmode)
17193 {
17194 rtx zero;
17195
17196 if (TARGET_AVX
17197 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17198 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17199 || optimize_insn_for_size_p ())
17200 {
17201 /* We will eventually emit movups based on insn attributes. */
17202 emit_insn (gen_sse2_loadupd (op0, op1));
17203 return;
17204 }
17205
17206 /* When SSE registers are split into halves, we can avoid
17207 writing to the top half twice. */
17208 if (TARGET_SSE_SPLIT_REGS)
17209 {
17210 emit_clobber (op0);
17211 zero = op0;
17212 }
17213 else
17214 {
17215 /* ??? Not sure about the best option for the Intel chips.
17216 The following would seem to satisfy; the register is
17217 entirely cleared, breaking the dependency chain. We
17218 then store to the upper half, with a dependency depth
17219 of one. A rumor has it that Intel recommends two movsd
17220 followed by an unpacklpd, but this is unconfirmed. And
17221 given that the dependency depth of the unpacklpd would
17222 still be one, I'm not sure why this would be better. */
17223 zero = CONST0_RTX (V2DFmode);
17224 }
17225
17226 m = adjust_address (op1, DFmode, 0);
17227 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17228 m = adjust_address (op1, DFmode, 8);
17229 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17230 }
17231 else
17232 {
17233 rtx t;
17234
17235 if (TARGET_AVX
17236 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17237 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17238 || optimize_insn_for_size_p ())
17239 {
17240 if (GET_MODE (op0) != V4SFmode)
17241 {
17242 orig_op0 = op0;
17243 op0 = gen_reg_rtx (V4SFmode);
17244 }
17245 op1 = gen_lowpart (V4SFmode, op1);
17246 emit_insn (gen_sse_loadups (op0, op1));
17247 if (orig_op0)
17248 emit_move_insn (orig_op0,
17249 gen_lowpart (GET_MODE (orig_op0), op0));
17250 return;
17251 }
17252
17253 if (mode != V4SFmode)
17254 t = gen_reg_rtx (V4SFmode);
17255 else
17256 t = op0;
17257
17258 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17259 emit_move_insn (t, CONST0_RTX (V4SFmode));
17260 else
17261 emit_clobber (t);
17262
17263 m = adjust_address (op1, V2SFmode, 0);
17264 emit_insn (gen_sse_loadlps (t, t, m));
17265 m = adjust_address (op1, V2SFmode, 8);
17266 emit_insn (gen_sse_loadhps (t, t, m));
17267 if (mode != V4SFmode)
17268 emit_move_insn (op0, gen_lowpart (mode, t));
17269 }
17270 }
17271 else if (MEM_P (op0))
17272 {
17273 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17274 {
17275 op0 = gen_lowpart (V16QImode, op0);
17276 op1 = gen_lowpart (V16QImode, op1);
17277 /* We will eventually emit movups based on insn attributes. */
17278 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17279 }
17280 else if (TARGET_SSE2 && mode == V2DFmode)
17281 {
17282 if (TARGET_AVX
17283 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17284 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17285 || optimize_insn_for_size_p ())
17286 /* We will eventually emit movups based on insn attributes. */
17287 emit_insn (gen_sse2_storeupd (op0, op1));
17288 else
17289 {
17290 m = adjust_address (op0, DFmode, 0);
17291 emit_insn (gen_sse2_storelpd (m, op1));
17292 m = adjust_address (op0, DFmode, 8);
17293 emit_insn (gen_sse2_storehpd (m, op1));
17294 }
17295 }
17296 else
17297 {
17298 if (mode != V4SFmode)
17299 op1 = gen_lowpart (V4SFmode, op1);
17300
17301 if (TARGET_AVX
17302 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17303 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17304 || optimize_insn_for_size_p ())
17305 {
17306 op0 = gen_lowpart (V4SFmode, op0);
17307 emit_insn (gen_sse_storeups (op0, op1));
17308 }
17309 else
17310 {
17311 m = adjust_address (op0, V2SFmode, 0);
17312 emit_insn (gen_sse_storelps (m, op1));
17313 m = adjust_address (op0, V2SFmode, 8);
17314 emit_insn (gen_sse_storehps (m, op1));
17315 }
17316 }
17317 }
17318 else
17319 gcc_unreachable ();
17320 }
17321
17322 /* Helper function of ix86_fixup_binary_operands to canonicalize
17323 operand order. Returns true if the operands should be swapped. */
17324
17325 static bool
17326 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17327 rtx operands[])
17328 {
17329 rtx dst = operands[0];
17330 rtx src1 = operands[1];
17331 rtx src2 = operands[2];
17332
17333 /* If the operation is not commutative, we can't do anything. */
17334 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17335 return false;
17336
17337 /* Highest priority is that src1 should match dst. */
17338 if (rtx_equal_p (dst, src1))
17339 return false;
17340 if (rtx_equal_p (dst, src2))
17341 return true;
17342
17343 /* Next highest priority is that immediate constants come second. */
17344 if (immediate_operand (src2, mode))
17345 return false;
17346 if (immediate_operand (src1, mode))
17347 return true;
17348
17349 /* Lowest priority is that memory references should come second. */
17350 if (MEM_P (src2))
17351 return false;
17352 if (MEM_P (src1))
17353 return true;
17354
17355 return false;
17356 }
17357
17358
17359 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17360 destination to use for the operation. If different from the true
17361 destination in operands[0], a copy operation will be required. */
17362
17363 rtx
17364 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17365 rtx operands[])
17366 {
17367 rtx dst = operands[0];
17368 rtx src1 = operands[1];
17369 rtx src2 = operands[2];
17370
17371 /* Canonicalize operand order. */
17372 if (ix86_swap_binary_operands_p (code, mode, operands))
17373 {
17374 rtx temp;
17375
17376 /* It is invalid to swap operands of different modes. */
17377 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17378
17379 temp = src1;
17380 src1 = src2;
17381 src2 = temp;
17382 }
17383
17384 /* Both source operands cannot be in memory. */
17385 if (MEM_P (src1) && MEM_P (src2))
17386 {
17387 /* Optimization: Only read from memory once. */
17388 if (rtx_equal_p (src1, src2))
17389 {
17390 src2 = force_reg (mode, src2);
17391 src1 = src2;
17392 }
17393 else if (rtx_equal_p (dst, src1))
17394 src2 = force_reg (mode, src2);
17395 else
17396 src1 = force_reg (mode, src1);
17397 }
17398
17399 /* If the destination is memory, and we do not have matching source
17400 operands, do things in registers. */
17401 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17402 dst = gen_reg_rtx (mode);
17403
17404 /* Source 1 cannot be a constant. */
17405 if (CONSTANT_P (src1))
17406 src1 = force_reg (mode, src1);
17407
17408 /* Source 1 cannot be a non-matching memory. */
17409 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17410 src1 = force_reg (mode, src1);
17411
17412 /* Improve address combine. */
17413 if (code == PLUS
17414 && GET_MODE_CLASS (mode) == MODE_INT
17415 && MEM_P (src2))
17416 src2 = force_reg (mode, src2);
17417
17418 operands[1] = src1;
17419 operands[2] = src2;
17420 return dst;
17421 }
17422
17423 /* Similarly, but assume that the destination has already been
17424 set up properly. */
17425
17426 void
17427 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17428 enum machine_mode mode, rtx operands[])
17429 {
17430 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17431 gcc_assert (dst == operands[0]);
17432 }
17433
17434 /* Attempt to expand a binary operator. Make the expansion closer to the
17435 actual machine, then just general_operand, which will allow 3 separate
17436 memory references (one output, two input) in a single insn. */
17437
17438 void
17439 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17440 rtx operands[])
17441 {
17442 rtx src1, src2, dst, op, clob;
17443
17444 dst = ix86_fixup_binary_operands (code, mode, operands);
17445 src1 = operands[1];
17446 src2 = operands[2];
17447
17448 /* Emit the instruction. */
17449
17450 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17451 if (reload_in_progress)
17452 {
17453 /* Reload doesn't know about the flags register, and doesn't know that
17454 it doesn't want to clobber it. We can only do this with PLUS. */
17455 gcc_assert (code == PLUS);
17456 emit_insn (op);
17457 }
17458 else if (reload_completed
17459 && code == PLUS
17460 && !rtx_equal_p (dst, src1))
17461 {
17462 /* This is going to be an LEA; avoid splitting it later. */
17463 emit_insn (op);
17464 }
17465 else
17466 {
17467 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17468 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17469 }
17470
17471 /* Fix up the destination if needed. */
17472 if (dst != operands[0])
17473 emit_move_insn (operands[0], dst);
17474 }
17475
17476 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17477 the given OPERANDS. */
17478
17479 void
17480 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17481 rtx operands[])
17482 {
17483 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17484 if (GET_CODE (operands[1]) == SUBREG)
17485 {
17486 op1 = operands[1];
17487 op2 = operands[2];
17488 }
17489 else if (GET_CODE (operands[2]) == SUBREG)
17490 {
17491 op1 = operands[2];
17492 op2 = operands[1];
17493 }
17494 /* Optimize (__m128i) d | (__m128i) e and similar code
17495 when d and e are float vectors into float vector logical
17496 insn. In C/C++ without using intrinsics there is no other way
17497 to express vector logical operation on float vectors than
17498 to cast them temporarily to integer vectors. */
17499 if (op1
17500 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17501 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17502 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17503 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17504 && SUBREG_BYTE (op1) == 0
17505 && (GET_CODE (op2) == CONST_VECTOR
17506 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17507 && SUBREG_BYTE (op2) == 0))
17508 && can_create_pseudo_p ())
17509 {
17510 rtx dst;
17511 switch (GET_MODE (SUBREG_REG (op1)))
17512 {
17513 case V4SFmode:
17514 case V8SFmode:
17515 case V2DFmode:
17516 case V4DFmode:
17517 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17518 if (GET_CODE (op2) == CONST_VECTOR)
17519 {
17520 op2 = gen_lowpart (GET_MODE (dst), op2);
17521 op2 = force_reg (GET_MODE (dst), op2);
17522 }
17523 else
17524 {
17525 op1 = operands[1];
17526 op2 = SUBREG_REG (operands[2]);
17527 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17528 op2 = force_reg (GET_MODE (dst), op2);
17529 }
17530 op1 = SUBREG_REG (op1);
17531 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17532 op1 = force_reg (GET_MODE (dst), op1);
17533 emit_insn (gen_rtx_SET (VOIDmode, dst,
17534 gen_rtx_fmt_ee (code, GET_MODE (dst),
17535 op1, op2)));
17536 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17537 return;
17538 default:
17539 break;
17540 }
17541 }
17542 if (!nonimmediate_operand (operands[1], mode))
17543 operands[1] = force_reg (mode, operands[1]);
17544 if (!nonimmediate_operand (operands[2], mode))
17545 operands[2] = force_reg (mode, operands[2]);
17546 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17547 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17548 gen_rtx_fmt_ee (code, mode, operands[1],
17549 operands[2])));
17550 }
17551
17552 /* Return TRUE or FALSE depending on whether the binary operator meets the
17553 appropriate constraints. */
17554
17555 bool
17556 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17557 rtx operands[3])
17558 {
17559 rtx dst = operands[0];
17560 rtx src1 = operands[1];
17561 rtx src2 = operands[2];
17562
17563 /* Both source operands cannot be in memory. */
17564 if (MEM_P (src1) && MEM_P (src2))
17565 return false;
17566
17567 /* Canonicalize operand order for commutative operators. */
17568 if (ix86_swap_binary_operands_p (code, mode, operands))
17569 {
17570 rtx temp = src1;
17571 src1 = src2;
17572 src2 = temp;
17573 }
17574
17575 /* If the destination is memory, we must have a matching source operand. */
17576 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17577 return false;
17578
17579 /* Source 1 cannot be a constant. */
17580 if (CONSTANT_P (src1))
17581 return false;
17582
17583 /* Source 1 cannot be a non-matching memory. */
17584 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17585 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17586 return (code == AND
17587 && (mode == HImode
17588 || mode == SImode
17589 || (TARGET_64BIT && mode == DImode))
17590 && satisfies_constraint_L (src2));
17591
17592 return true;
17593 }
17594
17595 /* Attempt to expand a unary operator. Make the expansion closer to the
17596 actual machine, then just general_operand, which will allow 2 separate
17597 memory references (one output, one input) in a single insn. */
17598
17599 void
17600 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17601 rtx operands[])
17602 {
17603 int matching_memory;
17604 rtx src, dst, op, clob;
17605
17606 dst = operands[0];
17607 src = operands[1];
17608
17609 /* If the destination is memory, and we do not have matching source
17610 operands, do things in registers. */
17611 matching_memory = 0;
17612 if (MEM_P (dst))
17613 {
17614 if (rtx_equal_p (dst, src))
17615 matching_memory = 1;
17616 else
17617 dst = gen_reg_rtx (mode);
17618 }
17619
17620 /* When source operand is memory, destination must match. */
17621 if (MEM_P (src) && !matching_memory)
17622 src = force_reg (mode, src);
17623
17624 /* Emit the instruction. */
17625
17626 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17627 if (reload_in_progress || code == NOT)
17628 {
17629 /* Reload doesn't know about the flags register, and doesn't know that
17630 it doesn't want to clobber it. */
17631 gcc_assert (code == NOT);
17632 emit_insn (op);
17633 }
17634 else
17635 {
17636 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17637 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17638 }
17639
17640 /* Fix up the destination if needed. */
17641 if (dst != operands[0])
17642 emit_move_insn (operands[0], dst);
17643 }
17644
17645 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17646 divisor are within the range [0-255]. */
17647
17648 void
17649 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17650 bool signed_p)
17651 {
17652 rtx end_label, qimode_label;
17653 rtx insn, div, mod;
17654 rtx scratch, tmp0, tmp1, tmp2;
17655 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17656 rtx (*gen_zero_extend) (rtx, rtx);
17657 rtx (*gen_test_ccno_1) (rtx, rtx);
17658
17659 switch (mode)
17660 {
17661 case SImode:
17662 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17663 gen_test_ccno_1 = gen_testsi_ccno_1;
17664 gen_zero_extend = gen_zero_extendqisi2;
17665 break;
17666 case DImode:
17667 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17668 gen_test_ccno_1 = gen_testdi_ccno_1;
17669 gen_zero_extend = gen_zero_extendqidi2;
17670 break;
17671 default:
17672 gcc_unreachable ();
17673 }
17674
17675 end_label = gen_label_rtx ();
17676 qimode_label = gen_label_rtx ();
17677
17678 scratch = gen_reg_rtx (mode);
17679
17680 /* Use 8bit unsigned divimod if dividend and divisor are within
17681 the range [0-255]. */
17682 emit_move_insn (scratch, operands[2]);
17683 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17684 scratch, 1, OPTAB_DIRECT);
17685 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17686 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17687 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17688 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17689 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17690 pc_rtx);
17691 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17692 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17693 JUMP_LABEL (insn) = qimode_label;
17694
17695 /* Generate original signed/unsigned divimod. */
17696 div = gen_divmod4_1 (operands[0], operands[1],
17697 operands[2], operands[3]);
17698 emit_insn (div);
17699
17700 /* Branch to the end. */
17701 emit_jump_insn (gen_jump (end_label));
17702 emit_barrier ();
17703
17704 /* Generate 8bit unsigned divide. */
17705 emit_label (qimode_label);
17706 /* Don't use operands[0] for result of 8bit divide since not all
17707 registers support QImode ZERO_EXTRACT. */
17708 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17709 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17710 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17711 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17712
17713 if (signed_p)
17714 {
17715 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17716 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17717 }
17718 else
17719 {
17720 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17721 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17722 }
17723
17724 /* Extract remainder from AH. */
17725 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17726 if (REG_P (operands[1]))
17727 insn = emit_move_insn (operands[1], tmp1);
17728 else
17729 {
17730 /* Need a new scratch register since the old one has result
17731 of 8bit divide. */
17732 scratch = gen_reg_rtx (mode);
17733 emit_move_insn (scratch, tmp1);
17734 insn = emit_move_insn (operands[1], scratch);
17735 }
17736 set_unique_reg_note (insn, REG_EQUAL, mod);
17737
17738 /* Zero extend quotient from AL. */
17739 tmp1 = gen_lowpart (QImode, tmp0);
17740 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17741 set_unique_reg_note (insn, REG_EQUAL, div);
17742
17743 emit_label (end_label);
17744 }
17745
17746 /* Whether it is OK to emit CFI directives when emitting asm code. */
17747
17748 bool
17749 ix86_emit_cfi ()
17750 {
17751 return dwarf2out_do_cfi_asm ();
17752 }
17753
17754 #define LEA_MAX_STALL (3)
17755 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17756
17757 /* Increase given DISTANCE in half-cycles according to
17758 dependencies between PREV and NEXT instructions.
17759 Add 1 half-cycle if there is no dependency and
17760 go to next cycle if there is some dependecy. */
17761
17762 static unsigned int
17763 increase_distance (rtx prev, rtx next, unsigned int distance)
17764 {
17765 df_ref def, use;
17766
17767 if (!prev || !next)
17768 return distance + (distance & 1) + 2;
17769
17770 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17771 return distance + 1;
17772
17773 FOR_EACH_INSN_USE (use, next)
17774 FOR_EACH_INSN_DEF (def, prev)
17775 if (!DF_REF_IS_ARTIFICIAL (def)
17776 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17777 return distance + (distance & 1) + 2;
17778
17779 return distance + 1;
17780 }
17781
17782 /* Function checks if instruction INSN defines register number
17783 REGNO1 or REGNO2. */
17784
17785 static bool
17786 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17787 rtx insn)
17788 {
17789 df_ref def;
17790
17791 FOR_EACH_INSN_DEF (def, insn)
17792 if (DF_REF_REG_DEF_P (def)
17793 && !DF_REF_IS_ARTIFICIAL (def)
17794 && (regno1 == DF_REF_REGNO (def)
17795 || regno2 == DF_REF_REGNO (def)))
17796 return true;
17797
17798 return false;
17799 }
17800
17801 /* Function checks if instruction INSN uses register number
17802 REGNO as a part of address expression. */
17803
17804 static bool
17805 insn_uses_reg_mem (unsigned int regno, rtx insn)
17806 {
17807 df_ref use;
17808
17809 FOR_EACH_INSN_USE (use, insn)
17810 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17811 return true;
17812
17813 return false;
17814 }
17815
17816 /* Search backward for non-agu definition of register number REGNO1
17817 or register number REGNO2 in basic block starting from instruction
17818 START up to head of basic block or instruction INSN.
17819
17820 Function puts true value into *FOUND var if definition was found
17821 and false otherwise.
17822
17823 Distance in half-cycles between START and found instruction or head
17824 of BB is added to DISTANCE and returned. */
17825
17826 static int
17827 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17828 rtx insn, int distance,
17829 rtx start, bool *found)
17830 {
17831 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17832 rtx prev = start;
17833 rtx next = NULL;
17834
17835 *found = false;
17836
17837 while (prev
17838 && prev != insn
17839 && distance < LEA_SEARCH_THRESHOLD)
17840 {
17841 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17842 {
17843 distance = increase_distance (prev, next, distance);
17844 if (insn_defines_reg (regno1, regno2, prev))
17845 {
17846 if (recog_memoized (prev) < 0
17847 || get_attr_type (prev) != TYPE_LEA)
17848 {
17849 *found = true;
17850 return distance;
17851 }
17852 }
17853
17854 next = prev;
17855 }
17856 if (prev == BB_HEAD (bb))
17857 break;
17858
17859 prev = PREV_INSN (prev);
17860 }
17861
17862 return distance;
17863 }
17864
17865 /* Search backward for non-agu definition of register number REGNO1
17866 or register number REGNO2 in INSN's basic block until
17867 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17868 2. Reach neighbour BBs boundary, or
17869 3. Reach agu definition.
17870 Returns the distance between the non-agu definition point and INSN.
17871 If no definition point, returns -1. */
17872
17873 static int
17874 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17875 rtx insn)
17876 {
17877 basic_block bb = BLOCK_FOR_INSN (insn);
17878 int distance = 0;
17879 bool found = false;
17880
17881 if (insn != BB_HEAD (bb))
17882 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17883 distance, PREV_INSN (insn),
17884 &found);
17885
17886 if (!found && distance < LEA_SEARCH_THRESHOLD)
17887 {
17888 edge e;
17889 edge_iterator ei;
17890 bool simple_loop = false;
17891
17892 FOR_EACH_EDGE (e, ei, bb->preds)
17893 if (e->src == bb)
17894 {
17895 simple_loop = true;
17896 break;
17897 }
17898
17899 if (simple_loop)
17900 distance = distance_non_agu_define_in_bb (regno1, regno2,
17901 insn, distance,
17902 BB_END (bb), &found);
17903 else
17904 {
17905 int shortest_dist = -1;
17906 bool found_in_bb = false;
17907
17908 FOR_EACH_EDGE (e, ei, bb->preds)
17909 {
17910 int bb_dist
17911 = distance_non_agu_define_in_bb (regno1, regno2,
17912 insn, distance,
17913 BB_END (e->src),
17914 &found_in_bb);
17915 if (found_in_bb)
17916 {
17917 if (shortest_dist < 0)
17918 shortest_dist = bb_dist;
17919 else if (bb_dist > 0)
17920 shortest_dist = MIN (bb_dist, shortest_dist);
17921
17922 found = true;
17923 }
17924 }
17925
17926 distance = shortest_dist;
17927 }
17928 }
17929
17930 /* get_attr_type may modify recog data. We want to make sure
17931 that recog data is valid for instruction INSN, on which
17932 distance_non_agu_define is called. INSN is unchanged here. */
17933 extract_insn_cached (insn);
17934
17935 if (!found)
17936 return -1;
17937
17938 return distance >> 1;
17939 }
17940
17941 /* Return the distance in half-cycles between INSN and the next
17942 insn that uses register number REGNO in memory address added
17943 to DISTANCE. Return -1 if REGNO0 is set.
17944
17945 Put true value into *FOUND if register usage was found and
17946 false otherwise.
17947 Put true value into *REDEFINED if register redefinition was
17948 found and false otherwise. */
17949
17950 static int
17951 distance_agu_use_in_bb (unsigned int regno,
17952 rtx insn, int distance, rtx start,
17953 bool *found, bool *redefined)
17954 {
17955 basic_block bb = NULL;
17956 rtx next = start;
17957 rtx prev = NULL;
17958
17959 *found = false;
17960 *redefined = false;
17961
17962 if (start != NULL_RTX)
17963 {
17964 bb = BLOCK_FOR_INSN (start);
17965 if (start != BB_HEAD (bb))
17966 /* If insn and start belong to the same bb, set prev to insn,
17967 so the call to increase_distance will increase the distance
17968 between insns by 1. */
17969 prev = insn;
17970 }
17971
17972 while (next
17973 && next != insn
17974 && distance < LEA_SEARCH_THRESHOLD)
17975 {
17976 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17977 {
17978 distance = increase_distance(prev, next, distance);
17979 if (insn_uses_reg_mem (regno, next))
17980 {
17981 /* Return DISTANCE if OP0 is used in memory
17982 address in NEXT. */
17983 *found = true;
17984 return distance;
17985 }
17986
17987 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17988 {
17989 /* Return -1 if OP0 is set in NEXT. */
17990 *redefined = true;
17991 return -1;
17992 }
17993
17994 prev = next;
17995 }
17996
17997 if (next == BB_END (bb))
17998 break;
17999
18000 next = NEXT_INSN (next);
18001 }
18002
18003 return distance;
18004 }
18005
18006 /* Return the distance between INSN and the next insn that uses
18007 register number REGNO0 in memory address. Return -1 if no such
18008 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18009
18010 static int
18011 distance_agu_use (unsigned int regno0, rtx insn)
18012 {
18013 basic_block bb = BLOCK_FOR_INSN (insn);
18014 int distance = 0;
18015 bool found = false;
18016 bool redefined = false;
18017
18018 if (insn != BB_END (bb))
18019 distance = distance_agu_use_in_bb (regno0, insn, distance,
18020 NEXT_INSN (insn),
18021 &found, &redefined);
18022
18023 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18024 {
18025 edge e;
18026 edge_iterator ei;
18027 bool simple_loop = false;
18028
18029 FOR_EACH_EDGE (e, ei, bb->succs)
18030 if (e->dest == bb)
18031 {
18032 simple_loop = true;
18033 break;
18034 }
18035
18036 if (simple_loop)
18037 distance = distance_agu_use_in_bb (regno0, insn,
18038 distance, BB_HEAD (bb),
18039 &found, &redefined);
18040 else
18041 {
18042 int shortest_dist = -1;
18043 bool found_in_bb = false;
18044 bool redefined_in_bb = false;
18045
18046 FOR_EACH_EDGE (e, ei, bb->succs)
18047 {
18048 int bb_dist
18049 = distance_agu_use_in_bb (regno0, insn,
18050 distance, BB_HEAD (e->dest),
18051 &found_in_bb, &redefined_in_bb);
18052 if (found_in_bb)
18053 {
18054 if (shortest_dist < 0)
18055 shortest_dist = bb_dist;
18056 else if (bb_dist > 0)
18057 shortest_dist = MIN (bb_dist, shortest_dist);
18058
18059 found = true;
18060 }
18061 }
18062
18063 distance = shortest_dist;
18064 }
18065 }
18066
18067 if (!found || redefined)
18068 return -1;
18069
18070 return distance >> 1;
18071 }
18072
18073 /* Define this macro to tune LEA priority vs ADD, it take effect when
18074 there is a dilemma of choicing LEA or ADD
18075 Negative value: ADD is more preferred than LEA
18076 Zero: Netrual
18077 Positive value: LEA is more preferred than ADD*/
18078 #define IX86_LEA_PRIORITY 0
18079
18080 /* Return true if usage of lea INSN has performance advantage
18081 over a sequence of instructions. Instructions sequence has
18082 SPLIT_COST cycles higher latency than lea latency. */
18083
18084 static bool
18085 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18086 unsigned int regno2, int split_cost, bool has_scale)
18087 {
18088 int dist_define, dist_use;
18089
18090 /* For Silvermont if using a 2-source or 3-source LEA for
18091 non-destructive destination purposes, or due to wanting
18092 ability to use SCALE, the use of LEA is justified. */
18093 if (TARGET_SILVERMONT || TARGET_INTEL)
18094 {
18095 if (has_scale)
18096 return true;
18097 if (split_cost < 1)
18098 return false;
18099 if (regno0 == regno1 || regno0 == regno2)
18100 return false;
18101 return true;
18102 }
18103
18104 dist_define = distance_non_agu_define (regno1, regno2, insn);
18105 dist_use = distance_agu_use (regno0, insn);
18106
18107 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18108 {
18109 /* If there is no non AGU operand definition, no AGU
18110 operand usage and split cost is 0 then both lea
18111 and non lea variants have same priority. Currently
18112 we prefer lea for 64 bit code and non lea on 32 bit
18113 code. */
18114 if (dist_use < 0 && split_cost == 0)
18115 return TARGET_64BIT || IX86_LEA_PRIORITY;
18116 else
18117 return true;
18118 }
18119
18120 /* With longer definitions distance lea is more preferable.
18121 Here we change it to take into account splitting cost and
18122 lea priority. */
18123 dist_define += split_cost + IX86_LEA_PRIORITY;
18124
18125 /* If there is no use in memory addess then we just check
18126 that split cost exceeds AGU stall. */
18127 if (dist_use < 0)
18128 return dist_define > LEA_MAX_STALL;
18129
18130 /* If this insn has both backward non-agu dependence and forward
18131 agu dependence, the one with short distance takes effect. */
18132 return dist_define >= dist_use;
18133 }
18134
18135 /* Return true if it is legal to clobber flags by INSN and
18136 false otherwise. */
18137
18138 static bool
18139 ix86_ok_to_clobber_flags (rtx insn)
18140 {
18141 basic_block bb = BLOCK_FOR_INSN (insn);
18142 df_ref use;
18143 bitmap live;
18144
18145 while (insn)
18146 {
18147 if (NONDEBUG_INSN_P (insn))
18148 {
18149 FOR_EACH_INSN_USE (use, insn)
18150 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18151 return false;
18152
18153 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18154 return true;
18155 }
18156
18157 if (insn == BB_END (bb))
18158 break;
18159
18160 insn = NEXT_INSN (insn);
18161 }
18162
18163 live = df_get_live_out(bb);
18164 return !REGNO_REG_SET_P (live, FLAGS_REG);
18165 }
18166
18167 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18168 move and add to avoid AGU stalls. */
18169
18170 bool
18171 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18172 {
18173 unsigned int regno0, regno1, regno2;
18174
18175 /* Check if we need to optimize. */
18176 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18177 return false;
18178
18179 /* Check it is correct to split here. */
18180 if (!ix86_ok_to_clobber_flags(insn))
18181 return false;
18182
18183 regno0 = true_regnum (operands[0]);
18184 regno1 = true_regnum (operands[1]);
18185 regno2 = true_regnum (operands[2]);
18186
18187 /* We need to split only adds with non destructive
18188 destination operand. */
18189 if (regno0 == regno1 || regno0 == regno2)
18190 return false;
18191 else
18192 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18193 }
18194
18195 /* Return true if we should emit lea instruction instead of mov
18196 instruction. */
18197
18198 bool
18199 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18200 {
18201 unsigned int regno0, regno1;
18202
18203 /* Check if we need to optimize. */
18204 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18205 return false;
18206
18207 /* Use lea for reg to reg moves only. */
18208 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18209 return false;
18210
18211 regno0 = true_regnum (operands[0]);
18212 regno1 = true_regnum (operands[1]);
18213
18214 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18215 }
18216
18217 /* Return true if we need to split lea into a sequence of
18218 instructions to avoid AGU stalls. */
18219
18220 bool
18221 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18222 {
18223 unsigned int regno0, regno1, regno2;
18224 int split_cost;
18225 struct ix86_address parts;
18226 int ok;
18227
18228 /* Check we need to optimize. */
18229 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18230 return false;
18231
18232 /* The "at least two components" test below might not catch simple
18233 move or zero extension insns if parts.base is non-NULL and parts.disp
18234 is const0_rtx as the only components in the address, e.g. if the
18235 register is %rbp or %r13. As this test is much cheaper and moves or
18236 zero extensions are the common case, do this check first. */
18237 if (REG_P (operands[1])
18238 || (SImode_address_operand (operands[1], VOIDmode)
18239 && REG_P (XEXP (operands[1], 0))))
18240 return false;
18241
18242 /* Check if it is OK to split here. */
18243 if (!ix86_ok_to_clobber_flags (insn))
18244 return false;
18245
18246 ok = ix86_decompose_address (operands[1], &parts);
18247 gcc_assert (ok);
18248
18249 /* There should be at least two components in the address. */
18250 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18251 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18252 return false;
18253
18254 /* We should not split into add if non legitimate pic
18255 operand is used as displacement. */
18256 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18257 return false;
18258
18259 regno0 = true_regnum (operands[0]) ;
18260 regno1 = INVALID_REGNUM;
18261 regno2 = INVALID_REGNUM;
18262
18263 if (parts.base)
18264 regno1 = true_regnum (parts.base);
18265 if (parts.index)
18266 regno2 = true_regnum (parts.index);
18267
18268 split_cost = 0;
18269
18270 /* Compute how many cycles we will add to execution time
18271 if split lea into a sequence of instructions. */
18272 if (parts.base || parts.index)
18273 {
18274 /* Have to use mov instruction if non desctructive
18275 destination form is used. */
18276 if (regno1 != regno0 && regno2 != regno0)
18277 split_cost += 1;
18278
18279 /* Have to add index to base if both exist. */
18280 if (parts.base && parts.index)
18281 split_cost += 1;
18282
18283 /* Have to use shift and adds if scale is 2 or greater. */
18284 if (parts.scale > 1)
18285 {
18286 if (regno0 != regno1)
18287 split_cost += 1;
18288 else if (regno2 == regno0)
18289 split_cost += 4;
18290 else
18291 split_cost += parts.scale;
18292 }
18293
18294 /* Have to use add instruction with immediate if
18295 disp is non zero. */
18296 if (parts.disp && parts.disp != const0_rtx)
18297 split_cost += 1;
18298
18299 /* Subtract the price of lea. */
18300 split_cost -= 1;
18301 }
18302
18303 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18304 parts.scale > 1);
18305 }
18306
18307 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18308 matches destination. RTX includes clobber of FLAGS_REG. */
18309
18310 static void
18311 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18312 rtx dst, rtx src)
18313 {
18314 rtx op, clob;
18315
18316 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18317 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18318
18319 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18320 }
18321
18322 /* Return true if regno1 def is nearest to the insn. */
18323
18324 static bool
18325 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18326 {
18327 rtx prev = insn;
18328 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18329
18330 if (insn == start)
18331 return false;
18332 while (prev && prev != start)
18333 {
18334 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18335 {
18336 prev = PREV_INSN (prev);
18337 continue;
18338 }
18339 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18340 return true;
18341 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18342 return false;
18343 prev = PREV_INSN (prev);
18344 }
18345
18346 /* None of the regs is defined in the bb. */
18347 return false;
18348 }
18349
18350 /* Split lea instructions into a sequence of instructions
18351 which are executed on ALU to avoid AGU stalls.
18352 It is assumed that it is allowed to clobber flags register
18353 at lea position. */
18354
18355 void
18356 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18357 {
18358 unsigned int regno0, regno1, regno2;
18359 struct ix86_address parts;
18360 rtx target, tmp;
18361 int ok, adds;
18362
18363 ok = ix86_decompose_address (operands[1], &parts);
18364 gcc_assert (ok);
18365
18366 target = gen_lowpart (mode, operands[0]);
18367
18368 regno0 = true_regnum (target);
18369 regno1 = INVALID_REGNUM;
18370 regno2 = INVALID_REGNUM;
18371
18372 if (parts.base)
18373 {
18374 parts.base = gen_lowpart (mode, parts.base);
18375 regno1 = true_regnum (parts.base);
18376 }
18377
18378 if (parts.index)
18379 {
18380 parts.index = gen_lowpart (mode, parts.index);
18381 regno2 = true_regnum (parts.index);
18382 }
18383
18384 if (parts.disp)
18385 parts.disp = gen_lowpart (mode, parts.disp);
18386
18387 if (parts.scale > 1)
18388 {
18389 /* Case r1 = r1 + ... */
18390 if (regno1 == regno0)
18391 {
18392 /* If we have a case r1 = r1 + C * r2 then we
18393 should use multiplication which is very
18394 expensive. Assume cost model is wrong if we
18395 have such case here. */
18396 gcc_assert (regno2 != regno0);
18397
18398 for (adds = parts.scale; adds > 0; adds--)
18399 ix86_emit_binop (PLUS, mode, target, parts.index);
18400 }
18401 else
18402 {
18403 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18404 if (regno0 != regno2)
18405 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18406
18407 /* Use shift for scaling. */
18408 ix86_emit_binop (ASHIFT, mode, target,
18409 GEN_INT (exact_log2 (parts.scale)));
18410
18411 if (parts.base)
18412 ix86_emit_binop (PLUS, mode, target, parts.base);
18413
18414 if (parts.disp && parts.disp != const0_rtx)
18415 ix86_emit_binop (PLUS, mode, target, parts.disp);
18416 }
18417 }
18418 else if (!parts.base && !parts.index)
18419 {
18420 gcc_assert(parts.disp);
18421 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18422 }
18423 else
18424 {
18425 if (!parts.base)
18426 {
18427 if (regno0 != regno2)
18428 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18429 }
18430 else if (!parts.index)
18431 {
18432 if (regno0 != regno1)
18433 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18434 }
18435 else
18436 {
18437 if (regno0 == regno1)
18438 tmp = parts.index;
18439 else if (regno0 == regno2)
18440 tmp = parts.base;
18441 else
18442 {
18443 rtx tmp1;
18444
18445 /* Find better operand for SET instruction, depending
18446 on which definition is farther from the insn. */
18447 if (find_nearest_reg_def (insn, regno1, regno2))
18448 tmp = parts.index, tmp1 = parts.base;
18449 else
18450 tmp = parts.base, tmp1 = parts.index;
18451
18452 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18453
18454 if (parts.disp && parts.disp != const0_rtx)
18455 ix86_emit_binop (PLUS, mode, target, parts.disp);
18456
18457 ix86_emit_binop (PLUS, mode, target, tmp1);
18458 return;
18459 }
18460
18461 ix86_emit_binop (PLUS, mode, target, tmp);
18462 }
18463
18464 if (parts.disp && parts.disp != const0_rtx)
18465 ix86_emit_binop (PLUS, mode, target, parts.disp);
18466 }
18467 }
18468
18469 /* Return true if it is ok to optimize an ADD operation to LEA
18470 operation to avoid flag register consumation. For most processors,
18471 ADD is faster than LEA. For the processors like BONNELL, if the
18472 destination register of LEA holds an actual address which will be
18473 used soon, LEA is better and otherwise ADD is better. */
18474
18475 bool
18476 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18477 {
18478 unsigned int regno0 = true_regnum (operands[0]);
18479 unsigned int regno1 = true_regnum (operands[1]);
18480 unsigned int regno2 = true_regnum (operands[2]);
18481
18482 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18483 if (regno0 != regno1 && regno0 != regno2)
18484 return true;
18485
18486 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18487 return false;
18488
18489 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18490 }
18491
18492 /* Return true if destination reg of SET_BODY is shift count of
18493 USE_BODY. */
18494
18495 static bool
18496 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18497 {
18498 rtx set_dest;
18499 rtx shift_rtx;
18500 int i;
18501
18502 /* Retrieve destination of SET_BODY. */
18503 switch (GET_CODE (set_body))
18504 {
18505 case SET:
18506 set_dest = SET_DEST (set_body);
18507 if (!set_dest || !REG_P (set_dest))
18508 return false;
18509 break;
18510 case PARALLEL:
18511 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18512 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18513 use_body))
18514 return true;
18515 default:
18516 return false;
18517 break;
18518 }
18519
18520 /* Retrieve shift count of USE_BODY. */
18521 switch (GET_CODE (use_body))
18522 {
18523 case SET:
18524 shift_rtx = XEXP (use_body, 1);
18525 break;
18526 case PARALLEL:
18527 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18528 if (ix86_dep_by_shift_count_body (set_body,
18529 XVECEXP (use_body, 0, i)))
18530 return true;
18531 default:
18532 return false;
18533 break;
18534 }
18535
18536 if (shift_rtx
18537 && (GET_CODE (shift_rtx) == ASHIFT
18538 || GET_CODE (shift_rtx) == LSHIFTRT
18539 || GET_CODE (shift_rtx) == ASHIFTRT
18540 || GET_CODE (shift_rtx) == ROTATE
18541 || GET_CODE (shift_rtx) == ROTATERT))
18542 {
18543 rtx shift_count = XEXP (shift_rtx, 1);
18544
18545 /* Return true if shift count is dest of SET_BODY. */
18546 if (REG_P (shift_count))
18547 {
18548 /* Add check since it can be invoked before register
18549 allocation in pre-reload schedule. */
18550 if (reload_completed
18551 && true_regnum (set_dest) == true_regnum (shift_count))
18552 return true;
18553 else if (REGNO(set_dest) == REGNO(shift_count))
18554 return true;
18555 }
18556 }
18557
18558 return false;
18559 }
18560
18561 /* Return true if destination reg of SET_INSN is shift count of
18562 USE_INSN. */
18563
18564 bool
18565 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18566 {
18567 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18568 PATTERN (use_insn));
18569 }
18570
18571 /* Return TRUE or FALSE depending on whether the unary operator meets the
18572 appropriate constraints. */
18573
18574 bool
18575 ix86_unary_operator_ok (enum rtx_code,
18576 enum machine_mode,
18577 rtx operands[2])
18578 {
18579 /* If one of operands is memory, source and destination must match. */
18580 if ((MEM_P (operands[0])
18581 || MEM_P (operands[1]))
18582 && ! rtx_equal_p (operands[0], operands[1]))
18583 return false;
18584 return true;
18585 }
18586
18587 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18588 are ok, keeping in mind the possible movddup alternative. */
18589
18590 bool
18591 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18592 {
18593 if (MEM_P (operands[0]))
18594 return rtx_equal_p (operands[0], operands[1 + high]);
18595 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18596 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18597 return true;
18598 }
18599
18600 /* Post-reload splitter for converting an SF or DFmode value in an
18601 SSE register into an unsigned SImode. */
18602
18603 void
18604 ix86_split_convert_uns_si_sse (rtx operands[])
18605 {
18606 enum machine_mode vecmode;
18607 rtx value, large, zero_or_two31, input, two31, x;
18608
18609 large = operands[1];
18610 zero_or_two31 = operands[2];
18611 input = operands[3];
18612 two31 = operands[4];
18613 vecmode = GET_MODE (large);
18614 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18615
18616 /* Load up the value into the low element. We must ensure that the other
18617 elements are valid floats -- zero is the easiest such value. */
18618 if (MEM_P (input))
18619 {
18620 if (vecmode == V4SFmode)
18621 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18622 else
18623 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18624 }
18625 else
18626 {
18627 input = gen_rtx_REG (vecmode, REGNO (input));
18628 emit_move_insn (value, CONST0_RTX (vecmode));
18629 if (vecmode == V4SFmode)
18630 emit_insn (gen_sse_movss (value, value, input));
18631 else
18632 emit_insn (gen_sse2_movsd (value, value, input));
18633 }
18634
18635 emit_move_insn (large, two31);
18636 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18637
18638 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18639 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18640
18641 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18642 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18643
18644 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18645 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18646
18647 large = gen_rtx_REG (V4SImode, REGNO (large));
18648 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18649
18650 x = gen_rtx_REG (V4SImode, REGNO (value));
18651 if (vecmode == V4SFmode)
18652 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18653 else
18654 emit_insn (gen_sse2_cvttpd2dq (x, value));
18655 value = x;
18656
18657 emit_insn (gen_xorv4si3 (value, value, large));
18658 }
18659
18660 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18661 Expects the 64-bit DImode to be supplied in a pair of integral
18662 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18663 -mfpmath=sse, !optimize_size only. */
18664
18665 void
18666 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18667 {
18668 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18669 rtx int_xmm, fp_xmm;
18670 rtx biases, exponents;
18671 rtx x;
18672
18673 int_xmm = gen_reg_rtx (V4SImode);
18674 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18675 emit_insn (gen_movdi_to_sse (int_xmm, input));
18676 else if (TARGET_SSE_SPLIT_REGS)
18677 {
18678 emit_clobber (int_xmm);
18679 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18680 }
18681 else
18682 {
18683 x = gen_reg_rtx (V2DImode);
18684 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18685 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18686 }
18687
18688 x = gen_rtx_CONST_VECTOR (V4SImode,
18689 gen_rtvec (4, GEN_INT (0x43300000UL),
18690 GEN_INT (0x45300000UL),
18691 const0_rtx, const0_rtx));
18692 exponents = validize_mem (force_const_mem (V4SImode, x));
18693
18694 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18695 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18696
18697 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18698 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18699 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18700 (0x1.0p84 + double(fp_value_hi_xmm)).
18701 Note these exponents differ by 32. */
18702
18703 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18704
18705 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18706 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18707 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18708 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18709 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18710 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18711 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18712 biases = validize_mem (force_const_mem (V2DFmode, biases));
18713 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18714
18715 /* Add the upper and lower DFmode values together. */
18716 if (TARGET_SSE3)
18717 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18718 else
18719 {
18720 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18721 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18722 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18723 }
18724
18725 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18726 }
18727
18728 /* Not used, but eases macroization of patterns. */
18729 void
18730 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18731 {
18732 gcc_unreachable ();
18733 }
18734
18735 /* Convert an unsigned SImode value into a DFmode. Only currently used
18736 for SSE, but applicable anywhere. */
18737
18738 void
18739 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18740 {
18741 REAL_VALUE_TYPE TWO31r;
18742 rtx x, fp;
18743
18744 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18745 NULL, 1, OPTAB_DIRECT);
18746
18747 fp = gen_reg_rtx (DFmode);
18748 emit_insn (gen_floatsidf2 (fp, x));
18749
18750 real_ldexp (&TWO31r, &dconst1, 31);
18751 x = const_double_from_real_value (TWO31r, DFmode);
18752
18753 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18754 if (x != target)
18755 emit_move_insn (target, x);
18756 }
18757
18758 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18759 32-bit mode; otherwise we have a direct convert instruction. */
18760
18761 void
18762 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18763 {
18764 REAL_VALUE_TYPE TWO32r;
18765 rtx fp_lo, fp_hi, x;
18766
18767 fp_lo = gen_reg_rtx (DFmode);
18768 fp_hi = gen_reg_rtx (DFmode);
18769
18770 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18771
18772 real_ldexp (&TWO32r, &dconst1, 32);
18773 x = const_double_from_real_value (TWO32r, DFmode);
18774 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18775
18776 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18777
18778 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18779 0, OPTAB_DIRECT);
18780 if (x != target)
18781 emit_move_insn (target, x);
18782 }
18783
18784 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18785 For x86_32, -mfpmath=sse, !optimize_size only. */
18786 void
18787 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18788 {
18789 REAL_VALUE_TYPE ONE16r;
18790 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18791
18792 real_ldexp (&ONE16r, &dconst1, 16);
18793 x = const_double_from_real_value (ONE16r, SFmode);
18794 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18795 NULL, 0, OPTAB_DIRECT);
18796 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18797 NULL, 0, OPTAB_DIRECT);
18798 fp_hi = gen_reg_rtx (SFmode);
18799 fp_lo = gen_reg_rtx (SFmode);
18800 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18801 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18802 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18803 0, OPTAB_DIRECT);
18804 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18805 0, OPTAB_DIRECT);
18806 if (!rtx_equal_p (target, fp_hi))
18807 emit_move_insn (target, fp_hi);
18808 }
18809
18810 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18811 a vector of unsigned ints VAL to vector of floats TARGET. */
18812
18813 void
18814 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18815 {
18816 rtx tmp[8];
18817 REAL_VALUE_TYPE TWO16r;
18818 enum machine_mode intmode = GET_MODE (val);
18819 enum machine_mode fltmode = GET_MODE (target);
18820 rtx (*cvt) (rtx, rtx);
18821
18822 if (intmode == V4SImode)
18823 cvt = gen_floatv4siv4sf2;
18824 else
18825 cvt = gen_floatv8siv8sf2;
18826 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18827 tmp[0] = force_reg (intmode, tmp[0]);
18828 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18829 OPTAB_DIRECT);
18830 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18831 NULL_RTX, 1, OPTAB_DIRECT);
18832 tmp[3] = gen_reg_rtx (fltmode);
18833 emit_insn (cvt (tmp[3], tmp[1]));
18834 tmp[4] = gen_reg_rtx (fltmode);
18835 emit_insn (cvt (tmp[4], tmp[2]));
18836 real_ldexp (&TWO16r, &dconst1, 16);
18837 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18838 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18839 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18840 OPTAB_DIRECT);
18841 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18842 OPTAB_DIRECT);
18843 if (tmp[7] != target)
18844 emit_move_insn (target, tmp[7]);
18845 }
18846
18847 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18848 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18849 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18850 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18851
18852 rtx
18853 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18854 {
18855 REAL_VALUE_TYPE TWO31r;
18856 rtx two31r, tmp[4];
18857 enum machine_mode mode = GET_MODE (val);
18858 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18859 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18860 rtx (*cmp) (rtx, rtx, rtx, rtx);
18861 int i;
18862
18863 for (i = 0; i < 3; i++)
18864 tmp[i] = gen_reg_rtx (mode);
18865 real_ldexp (&TWO31r, &dconst1, 31);
18866 two31r = const_double_from_real_value (TWO31r, scalarmode);
18867 two31r = ix86_build_const_vector (mode, 1, two31r);
18868 two31r = force_reg (mode, two31r);
18869 switch (mode)
18870 {
18871 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18872 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18873 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18874 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18875 default: gcc_unreachable ();
18876 }
18877 tmp[3] = gen_rtx_LE (mode, two31r, val);
18878 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18879 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18880 0, OPTAB_DIRECT);
18881 if (intmode == V4SImode || TARGET_AVX2)
18882 *xorp = expand_simple_binop (intmode, ASHIFT,
18883 gen_lowpart (intmode, tmp[0]),
18884 GEN_INT (31), NULL_RTX, 0,
18885 OPTAB_DIRECT);
18886 else
18887 {
18888 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18889 two31 = ix86_build_const_vector (intmode, 1, two31);
18890 *xorp = expand_simple_binop (intmode, AND,
18891 gen_lowpart (intmode, tmp[0]),
18892 two31, NULL_RTX, 0,
18893 OPTAB_DIRECT);
18894 }
18895 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18896 0, OPTAB_DIRECT);
18897 }
18898
18899 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18900 then replicate the value for all elements of the vector
18901 register. */
18902
18903 rtx
18904 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18905 {
18906 int i, n_elt;
18907 rtvec v;
18908 enum machine_mode scalar_mode;
18909
18910 switch (mode)
18911 {
18912 case V64QImode:
18913 case V32QImode:
18914 case V16QImode:
18915 case V32HImode:
18916 case V16HImode:
18917 case V8HImode:
18918 case V16SImode:
18919 case V8SImode:
18920 case V4SImode:
18921 case V8DImode:
18922 case V4DImode:
18923 case V2DImode:
18924 gcc_assert (vect);
18925 case V16SFmode:
18926 case V8SFmode:
18927 case V4SFmode:
18928 case V8DFmode:
18929 case V4DFmode:
18930 case V2DFmode:
18931 n_elt = GET_MODE_NUNITS (mode);
18932 v = rtvec_alloc (n_elt);
18933 scalar_mode = GET_MODE_INNER (mode);
18934
18935 RTVEC_ELT (v, 0) = value;
18936
18937 for (i = 1; i < n_elt; ++i)
18938 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18939
18940 return gen_rtx_CONST_VECTOR (mode, v);
18941
18942 default:
18943 gcc_unreachable ();
18944 }
18945 }
18946
18947 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18948 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18949 for an SSE register. If VECT is true, then replicate the mask for
18950 all elements of the vector register. If INVERT is true, then create
18951 a mask excluding the sign bit. */
18952
18953 rtx
18954 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18955 {
18956 enum machine_mode vec_mode, imode;
18957 HOST_WIDE_INT hi, lo;
18958 int shift = 63;
18959 rtx v;
18960 rtx mask;
18961
18962 /* Find the sign bit, sign extended to 2*HWI. */
18963 switch (mode)
18964 {
18965 case V16SImode:
18966 case V16SFmode:
18967 case V8SImode:
18968 case V4SImode:
18969 case V8SFmode:
18970 case V4SFmode:
18971 vec_mode = mode;
18972 mode = GET_MODE_INNER (mode);
18973 imode = SImode;
18974 lo = 0x80000000, hi = lo < 0;
18975 break;
18976
18977 case V8DImode:
18978 case V4DImode:
18979 case V2DImode:
18980 case V8DFmode:
18981 case V4DFmode:
18982 case V2DFmode:
18983 vec_mode = mode;
18984 mode = GET_MODE_INNER (mode);
18985 imode = DImode;
18986 if (HOST_BITS_PER_WIDE_INT >= 64)
18987 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18988 else
18989 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18990 break;
18991
18992 case TImode:
18993 case TFmode:
18994 vec_mode = VOIDmode;
18995 if (HOST_BITS_PER_WIDE_INT >= 64)
18996 {
18997 imode = TImode;
18998 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18999 }
19000 else
19001 {
19002 rtvec vec;
19003
19004 imode = DImode;
19005 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19006
19007 if (invert)
19008 {
19009 lo = ~lo, hi = ~hi;
19010 v = constm1_rtx;
19011 }
19012 else
19013 v = const0_rtx;
19014
19015 mask = immed_double_const (lo, hi, imode);
19016
19017 vec = gen_rtvec (2, v, mask);
19018 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19019 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19020
19021 return v;
19022 }
19023 break;
19024
19025 default:
19026 gcc_unreachable ();
19027 }
19028
19029 if (invert)
19030 lo = ~lo, hi = ~hi;
19031
19032 /* Force this value into the low part of a fp vector constant. */
19033 mask = immed_double_const (lo, hi, imode);
19034 mask = gen_lowpart (mode, mask);
19035
19036 if (vec_mode == VOIDmode)
19037 return force_reg (mode, mask);
19038
19039 v = ix86_build_const_vector (vec_mode, vect, mask);
19040 return force_reg (vec_mode, v);
19041 }
19042
19043 /* Generate code for floating point ABS or NEG. */
19044
19045 void
19046 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19047 rtx operands[])
19048 {
19049 rtx mask, set, dst, src;
19050 bool use_sse = false;
19051 bool vector_mode = VECTOR_MODE_P (mode);
19052 enum machine_mode vmode = mode;
19053
19054 if (vector_mode)
19055 use_sse = true;
19056 else if (mode == TFmode)
19057 use_sse = true;
19058 else if (TARGET_SSE_MATH)
19059 {
19060 use_sse = SSE_FLOAT_MODE_P (mode);
19061 if (mode == SFmode)
19062 vmode = V4SFmode;
19063 else if (mode == DFmode)
19064 vmode = V2DFmode;
19065 }
19066
19067 /* NEG and ABS performed with SSE use bitwise mask operations.
19068 Create the appropriate mask now. */
19069 if (use_sse)
19070 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19071 else
19072 mask = NULL_RTX;
19073
19074 dst = operands[0];
19075 src = operands[1];
19076
19077 set = gen_rtx_fmt_e (code, mode, src);
19078 set = gen_rtx_SET (VOIDmode, dst, set);
19079
19080 if (mask)
19081 {
19082 rtx use, clob;
19083 rtvec par;
19084
19085 use = gen_rtx_USE (VOIDmode, mask);
19086 if (vector_mode)
19087 par = gen_rtvec (2, set, use);
19088 else
19089 {
19090 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19091 par = gen_rtvec (3, set, use, clob);
19092 }
19093 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19094 }
19095 else
19096 emit_insn (set);
19097 }
19098
19099 /* Expand a copysign operation. Special case operand 0 being a constant. */
19100
19101 void
19102 ix86_expand_copysign (rtx operands[])
19103 {
19104 enum machine_mode mode, vmode;
19105 rtx dest, op0, op1, mask, nmask;
19106
19107 dest = operands[0];
19108 op0 = operands[1];
19109 op1 = operands[2];
19110
19111 mode = GET_MODE (dest);
19112
19113 if (mode == SFmode)
19114 vmode = V4SFmode;
19115 else if (mode == DFmode)
19116 vmode = V2DFmode;
19117 else
19118 vmode = mode;
19119
19120 if (GET_CODE (op0) == CONST_DOUBLE)
19121 {
19122 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19123
19124 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19125 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19126
19127 if (mode == SFmode || mode == DFmode)
19128 {
19129 if (op0 == CONST0_RTX (mode))
19130 op0 = CONST0_RTX (vmode);
19131 else
19132 {
19133 rtx v = ix86_build_const_vector (vmode, false, op0);
19134
19135 op0 = force_reg (vmode, v);
19136 }
19137 }
19138 else if (op0 != CONST0_RTX (mode))
19139 op0 = force_reg (mode, op0);
19140
19141 mask = ix86_build_signbit_mask (vmode, 0, 0);
19142
19143 if (mode == SFmode)
19144 copysign_insn = gen_copysignsf3_const;
19145 else if (mode == DFmode)
19146 copysign_insn = gen_copysigndf3_const;
19147 else
19148 copysign_insn = gen_copysigntf3_const;
19149
19150 emit_insn (copysign_insn (dest, op0, op1, mask));
19151 }
19152 else
19153 {
19154 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19155
19156 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19157 mask = ix86_build_signbit_mask (vmode, 0, 0);
19158
19159 if (mode == SFmode)
19160 copysign_insn = gen_copysignsf3_var;
19161 else if (mode == DFmode)
19162 copysign_insn = gen_copysigndf3_var;
19163 else
19164 copysign_insn = gen_copysigntf3_var;
19165
19166 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19167 }
19168 }
19169
19170 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19171 be a constant, and so has already been expanded into a vector constant. */
19172
19173 void
19174 ix86_split_copysign_const (rtx operands[])
19175 {
19176 enum machine_mode mode, vmode;
19177 rtx dest, op0, mask, x;
19178
19179 dest = operands[0];
19180 op0 = operands[1];
19181 mask = operands[3];
19182
19183 mode = GET_MODE (dest);
19184 vmode = GET_MODE (mask);
19185
19186 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19187 x = gen_rtx_AND (vmode, dest, mask);
19188 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19189
19190 if (op0 != CONST0_RTX (vmode))
19191 {
19192 x = gen_rtx_IOR (vmode, dest, op0);
19193 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19194 }
19195 }
19196
19197 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19198 so we have to do two masks. */
19199
19200 void
19201 ix86_split_copysign_var (rtx operands[])
19202 {
19203 enum machine_mode mode, vmode;
19204 rtx dest, scratch, op0, op1, mask, nmask, x;
19205
19206 dest = operands[0];
19207 scratch = operands[1];
19208 op0 = operands[2];
19209 op1 = operands[3];
19210 nmask = operands[4];
19211 mask = operands[5];
19212
19213 mode = GET_MODE (dest);
19214 vmode = GET_MODE (mask);
19215
19216 if (rtx_equal_p (op0, op1))
19217 {
19218 /* Shouldn't happen often (it's useless, obviously), but when it does
19219 we'd generate incorrect code if we continue below. */
19220 emit_move_insn (dest, op0);
19221 return;
19222 }
19223
19224 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19225 {
19226 gcc_assert (REGNO (op1) == REGNO (scratch));
19227
19228 x = gen_rtx_AND (vmode, scratch, mask);
19229 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19230
19231 dest = mask;
19232 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19233 x = gen_rtx_NOT (vmode, dest);
19234 x = gen_rtx_AND (vmode, x, op0);
19235 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19236 }
19237 else
19238 {
19239 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19240 {
19241 x = gen_rtx_AND (vmode, scratch, mask);
19242 }
19243 else /* alternative 2,4 */
19244 {
19245 gcc_assert (REGNO (mask) == REGNO (scratch));
19246 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19247 x = gen_rtx_AND (vmode, scratch, op1);
19248 }
19249 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19250
19251 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19252 {
19253 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19254 x = gen_rtx_AND (vmode, dest, nmask);
19255 }
19256 else /* alternative 3,4 */
19257 {
19258 gcc_assert (REGNO (nmask) == REGNO (dest));
19259 dest = nmask;
19260 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19261 x = gen_rtx_AND (vmode, dest, op0);
19262 }
19263 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19264 }
19265
19266 x = gen_rtx_IOR (vmode, dest, scratch);
19267 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19268 }
19269
19270 /* Return TRUE or FALSE depending on whether the first SET in INSN
19271 has source and destination with matching CC modes, and that the
19272 CC mode is at least as constrained as REQ_MODE. */
19273
19274 bool
19275 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19276 {
19277 rtx set;
19278 enum machine_mode set_mode;
19279
19280 set = PATTERN (insn);
19281 if (GET_CODE (set) == PARALLEL)
19282 set = XVECEXP (set, 0, 0);
19283 gcc_assert (GET_CODE (set) == SET);
19284 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19285
19286 set_mode = GET_MODE (SET_DEST (set));
19287 switch (set_mode)
19288 {
19289 case CCNOmode:
19290 if (req_mode != CCNOmode
19291 && (req_mode != CCmode
19292 || XEXP (SET_SRC (set), 1) != const0_rtx))
19293 return false;
19294 break;
19295 case CCmode:
19296 if (req_mode == CCGCmode)
19297 return false;
19298 /* FALLTHRU */
19299 case CCGCmode:
19300 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19301 return false;
19302 /* FALLTHRU */
19303 case CCGOCmode:
19304 if (req_mode == CCZmode)
19305 return false;
19306 /* FALLTHRU */
19307 case CCZmode:
19308 break;
19309
19310 case CCAmode:
19311 case CCCmode:
19312 case CCOmode:
19313 case CCSmode:
19314 if (set_mode != req_mode)
19315 return false;
19316 break;
19317
19318 default:
19319 gcc_unreachable ();
19320 }
19321
19322 return GET_MODE (SET_SRC (set)) == set_mode;
19323 }
19324
19325 /* Generate insn patterns to do an integer compare of OPERANDS. */
19326
19327 static rtx
19328 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19329 {
19330 enum machine_mode cmpmode;
19331 rtx tmp, flags;
19332
19333 cmpmode = SELECT_CC_MODE (code, op0, op1);
19334 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19335
19336 /* This is very simple, but making the interface the same as in the
19337 FP case makes the rest of the code easier. */
19338 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19339 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19340
19341 /* Return the test that should be put into the flags user, i.e.
19342 the bcc, scc, or cmov instruction. */
19343 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19344 }
19345
19346 /* Figure out whether to use ordered or unordered fp comparisons.
19347 Return the appropriate mode to use. */
19348
19349 enum machine_mode
19350 ix86_fp_compare_mode (enum rtx_code)
19351 {
19352 /* ??? In order to make all comparisons reversible, we do all comparisons
19353 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19354 all forms trapping and nontrapping comparisons, we can make inequality
19355 comparisons trapping again, since it results in better code when using
19356 FCOM based compares. */
19357 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19358 }
19359
19360 enum machine_mode
19361 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19362 {
19363 enum machine_mode mode = GET_MODE (op0);
19364
19365 if (SCALAR_FLOAT_MODE_P (mode))
19366 {
19367 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19368 return ix86_fp_compare_mode (code);
19369 }
19370
19371 switch (code)
19372 {
19373 /* Only zero flag is needed. */
19374 case EQ: /* ZF=0 */
19375 case NE: /* ZF!=0 */
19376 return CCZmode;
19377 /* Codes needing carry flag. */
19378 case GEU: /* CF=0 */
19379 case LTU: /* CF=1 */
19380 /* Detect overflow checks. They need just the carry flag. */
19381 if (GET_CODE (op0) == PLUS
19382 && rtx_equal_p (op1, XEXP (op0, 0)))
19383 return CCCmode;
19384 else
19385 return CCmode;
19386 case GTU: /* CF=0 & ZF=0 */
19387 case LEU: /* CF=1 | ZF=1 */
19388 return CCmode;
19389 /* Codes possibly doable only with sign flag when
19390 comparing against zero. */
19391 case GE: /* SF=OF or SF=0 */
19392 case LT: /* SF<>OF or SF=1 */
19393 if (op1 == const0_rtx)
19394 return CCGOCmode;
19395 else
19396 /* For other cases Carry flag is not required. */
19397 return CCGCmode;
19398 /* Codes doable only with sign flag when comparing
19399 against zero, but we miss jump instruction for it
19400 so we need to use relational tests against overflow
19401 that thus needs to be zero. */
19402 case GT: /* ZF=0 & SF=OF */
19403 case LE: /* ZF=1 | SF<>OF */
19404 if (op1 == const0_rtx)
19405 return CCNOmode;
19406 else
19407 return CCGCmode;
19408 /* strcmp pattern do (use flags) and combine may ask us for proper
19409 mode. */
19410 case USE:
19411 return CCmode;
19412 default:
19413 gcc_unreachable ();
19414 }
19415 }
19416
19417 /* Return the fixed registers used for condition codes. */
19418
19419 static bool
19420 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19421 {
19422 *p1 = FLAGS_REG;
19423 *p2 = FPSR_REG;
19424 return true;
19425 }
19426
19427 /* If two condition code modes are compatible, return a condition code
19428 mode which is compatible with both. Otherwise, return
19429 VOIDmode. */
19430
19431 static enum machine_mode
19432 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19433 {
19434 if (m1 == m2)
19435 return m1;
19436
19437 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19438 return VOIDmode;
19439
19440 if ((m1 == CCGCmode && m2 == CCGOCmode)
19441 || (m1 == CCGOCmode && m2 == CCGCmode))
19442 return CCGCmode;
19443
19444 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19445 return m2;
19446 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19447 return m1;
19448
19449 switch (m1)
19450 {
19451 default:
19452 gcc_unreachable ();
19453
19454 case CCmode:
19455 case CCGCmode:
19456 case CCGOCmode:
19457 case CCNOmode:
19458 case CCAmode:
19459 case CCCmode:
19460 case CCOmode:
19461 case CCSmode:
19462 case CCZmode:
19463 switch (m2)
19464 {
19465 default:
19466 return VOIDmode;
19467
19468 case CCmode:
19469 case CCGCmode:
19470 case CCGOCmode:
19471 case CCNOmode:
19472 case CCAmode:
19473 case CCCmode:
19474 case CCOmode:
19475 case CCSmode:
19476 case CCZmode:
19477 return CCmode;
19478 }
19479
19480 case CCFPmode:
19481 case CCFPUmode:
19482 /* These are only compatible with themselves, which we already
19483 checked above. */
19484 return VOIDmode;
19485 }
19486 }
19487
19488
19489 /* Return a comparison we can do and that it is equivalent to
19490 swap_condition (code) apart possibly from orderedness.
19491 But, never change orderedness if TARGET_IEEE_FP, returning
19492 UNKNOWN in that case if necessary. */
19493
19494 static enum rtx_code
19495 ix86_fp_swap_condition (enum rtx_code code)
19496 {
19497 switch (code)
19498 {
19499 case GT: /* GTU - CF=0 & ZF=0 */
19500 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19501 case GE: /* GEU - CF=0 */
19502 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19503 case UNLT: /* LTU - CF=1 */
19504 return TARGET_IEEE_FP ? UNKNOWN : GT;
19505 case UNLE: /* LEU - CF=1 | ZF=1 */
19506 return TARGET_IEEE_FP ? UNKNOWN : GE;
19507 default:
19508 return swap_condition (code);
19509 }
19510 }
19511
19512 /* Return cost of comparison CODE using the best strategy for performance.
19513 All following functions do use number of instructions as a cost metrics.
19514 In future this should be tweaked to compute bytes for optimize_size and
19515 take into account performance of various instructions on various CPUs. */
19516
19517 static int
19518 ix86_fp_comparison_cost (enum rtx_code code)
19519 {
19520 int arith_cost;
19521
19522 /* The cost of code using bit-twiddling on %ah. */
19523 switch (code)
19524 {
19525 case UNLE:
19526 case UNLT:
19527 case LTGT:
19528 case GT:
19529 case GE:
19530 case UNORDERED:
19531 case ORDERED:
19532 case UNEQ:
19533 arith_cost = 4;
19534 break;
19535 case LT:
19536 case NE:
19537 case EQ:
19538 case UNGE:
19539 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19540 break;
19541 case LE:
19542 case UNGT:
19543 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19544 break;
19545 default:
19546 gcc_unreachable ();
19547 }
19548
19549 switch (ix86_fp_comparison_strategy (code))
19550 {
19551 case IX86_FPCMP_COMI:
19552 return arith_cost > 4 ? 3 : 2;
19553 case IX86_FPCMP_SAHF:
19554 return arith_cost > 4 ? 4 : 3;
19555 default:
19556 return arith_cost;
19557 }
19558 }
19559
19560 /* Return strategy to use for floating-point. We assume that fcomi is always
19561 preferrable where available, since that is also true when looking at size
19562 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19563
19564 enum ix86_fpcmp_strategy
19565 ix86_fp_comparison_strategy (enum rtx_code)
19566 {
19567 /* Do fcomi/sahf based test when profitable. */
19568
19569 if (TARGET_CMOVE)
19570 return IX86_FPCMP_COMI;
19571
19572 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19573 return IX86_FPCMP_SAHF;
19574
19575 return IX86_FPCMP_ARITH;
19576 }
19577
19578 /* Swap, force into registers, or otherwise massage the two operands
19579 to a fp comparison. The operands are updated in place; the new
19580 comparison code is returned. */
19581
19582 static enum rtx_code
19583 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19584 {
19585 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19586 rtx op0 = *pop0, op1 = *pop1;
19587 enum machine_mode op_mode = GET_MODE (op0);
19588 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19589
19590 /* All of the unordered compare instructions only work on registers.
19591 The same is true of the fcomi compare instructions. The XFmode
19592 compare instructions require registers except when comparing
19593 against zero or when converting operand 1 from fixed point to
19594 floating point. */
19595
19596 if (!is_sse
19597 && (fpcmp_mode == CCFPUmode
19598 || (op_mode == XFmode
19599 && ! (standard_80387_constant_p (op0) == 1
19600 || standard_80387_constant_p (op1) == 1)
19601 && GET_CODE (op1) != FLOAT)
19602 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19603 {
19604 op0 = force_reg (op_mode, op0);
19605 op1 = force_reg (op_mode, op1);
19606 }
19607 else
19608 {
19609 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19610 things around if they appear profitable, otherwise force op0
19611 into a register. */
19612
19613 if (standard_80387_constant_p (op0) == 0
19614 || (MEM_P (op0)
19615 && ! (standard_80387_constant_p (op1) == 0
19616 || MEM_P (op1))))
19617 {
19618 enum rtx_code new_code = ix86_fp_swap_condition (code);
19619 if (new_code != UNKNOWN)
19620 {
19621 rtx tmp;
19622 tmp = op0, op0 = op1, op1 = tmp;
19623 code = new_code;
19624 }
19625 }
19626
19627 if (!REG_P (op0))
19628 op0 = force_reg (op_mode, op0);
19629
19630 if (CONSTANT_P (op1))
19631 {
19632 int tmp = standard_80387_constant_p (op1);
19633 if (tmp == 0)
19634 op1 = validize_mem (force_const_mem (op_mode, op1));
19635 else if (tmp == 1)
19636 {
19637 if (TARGET_CMOVE)
19638 op1 = force_reg (op_mode, op1);
19639 }
19640 else
19641 op1 = force_reg (op_mode, op1);
19642 }
19643 }
19644
19645 /* Try to rearrange the comparison to make it cheaper. */
19646 if (ix86_fp_comparison_cost (code)
19647 > ix86_fp_comparison_cost (swap_condition (code))
19648 && (REG_P (op1) || can_create_pseudo_p ()))
19649 {
19650 rtx tmp;
19651 tmp = op0, op0 = op1, op1 = tmp;
19652 code = swap_condition (code);
19653 if (!REG_P (op0))
19654 op0 = force_reg (op_mode, op0);
19655 }
19656
19657 *pop0 = op0;
19658 *pop1 = op1;
19659 return code;
19660 }
19661
19662 /* Convert comparison codes we use to represent FP comparison to integer
19663 code that will result in proper branch. Return UNKNOWN if no such code
19664 is available. */
19665
19666 enum rtx_code
19667 ix86_fp_compare_code_to_integer (enum rtx_code code)
19668 {
19669 switch (code)
19670 {
19671 case GT:
19672 return GTU;
19673 case GE:
19674 return GEU;
19675 case ORDERED:
19676 case UNORDERED:
19677 return code;
19678 break;
19679 case UNEQ:
19680 return EQ;
19681 break;
19682 case UNLT:
19683 return LTU;
19684 break;
19685 case UNLE:
19686 return LEU;
19687 break;
19688 case LTGT:
19689 return NE;
19690 break;
19691 default:
19692 return UNKNOWN;
19693 }
19694 }
19695
19696 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19697
19698 static rtx
19699 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19700 {
19701 enum machine_mode fpcmp_mode, intcmp_mode;
19702 rtx tmp, tmp2;
19703
19704 fpcmp_mode = ix86_fp_compare_mode (code);
19705 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19706
19707 /* Do fcomi/sahf based test when profitable. */
19708 switch (ix86_fp_comparison_strategy (code))
19709 {
19710 case IX86_FPCMP_COMI:
19711 intcmp_mode = fpcmp_mode;
19712 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19713 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19714 tmp);
19715 emit_insn (tmp);
19716 break;
19717
19718 case IX86_FPCMP_SAHF:
19719 intcmp_mode = fpcmp_mode;
19720 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19721 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19722 tmp);
19723
19724 if (!scratch)
19725 scratch = gen_reg_rtx (HImode);
19726 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19727 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19728 break;
19729
19730 case IX86_FPCMP_ARITH:
19731 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19732 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19733 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19734 if (!scratch)
19735 scratch = gen_reg_rtx (HImode);
19736 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19737
19738 /* In the unordered case, we have to check C2 for NaN's, which
19739 doesn't happen to work out to anything nice combination-wise.
19740 So do some bit twiddling on the value we've got in AH to come
19741 up with an appropriate set of condition codes. */
19742
19743 intcmp_mode = CCNOmode;
19744 switch (code)
19745 {
19746 case GT:
19747 case UNGT:
19748 if (code == GT || !TARGET_IEEE_FP)
19749 {
19750 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19751 code = EQ;
19752 }
19753 else
19754 {
19755 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19756 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19757 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19758 intcmp_mode = CCmode;
19759 code = GEU;
19760 }
19761 break;
19762 case LT:
19763 case UNLT:
19764 if (code == LT && TARGET_IEEE_FP)
19765 {
19766 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19767 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19768 intcmp_mode = CCmode;
19769 code = EQ;
19770 }
19771 else
19772 {
19773 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19774 code = NE;
19775 }
19776 break;
19777 case GE:
19778 case UNGE:
19779 if (code == GE || !TARGET_IEEE_FP)
19780 {
19781 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19782 code = EQ;
19783 }
19784 else
19785 {
19786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19787 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19788 code = NE;
19789 }
19790 break;
19791 case LE:
19792 case UNLE:
19793 if (code == LE && TARGET_IEEE_FP)
19794 {
19795 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19796 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19797 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19798 intcmp_mode = CCmode;
19799 code = LTU;
19800 }
19801 else
19802 {
19803 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19804 code = NE;
19805 }
19806 break;
19807 case EQ:
19808 case UNEQ:
19809 if (code == EQ && TARGET_IEEE_FP)
19810 {
19811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19812 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19813 intcmp_mode = CCmode;
19814 code = EQ;
19815 }
19816 else
19817 {
19818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19819 code = NE;
19820 }
19821 break;
19822 case NE:
19823 case LTGT:
19824 if (code == NE && TARGET_IEEE_FP)
19825 {
19826 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19827 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19828 GEN_INT (0x40)));
19829 code = NE;
19830 }
19831 else
19832 {
19833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19834 code = EQ;
19835 }
19836 break;
19837
19838 case UNORDERED:
19839 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19840 code = NE;
19841 break;
19842 case ORDERED:
19843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19844 code = EQ;
19845 break;
19846
19847 default:
19848 gcc_unreachable ();
19849 }
19850 break;
19851
19852 default:
19853 gcc_unreachable();
19854 }
19855
19856 /* Return the test that should be put into the flags user, i.e.
19857 the bcc, scc, or cmov instruction. */
19858 return gen_rtx_fmt_ee (code, VOIDmode,
19859 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19860 const0_rtx);
19861 }
19862
19863 static rtx
19864 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19865 {
19866 rtx ret;
19867
19868 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19869 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19870
19871 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19872 {
19873 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19874 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19875 }
19876 else
19877 ret = ix86_expand_int_compare (code, op0, op1);
19878
19879 return ret;
19880 }
19881
19882 void
19883 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19884 {
19885 enum machine_mode mode = GET_MODE (op0);
19886 rtx tmp;
19887
19888 switch (mode)
19889 {
19890 case SFmode:
19891 case DFmode:
19892 case XFmode:
19893 case QImode:
19894 case HImode:
19895 case SImode:
19896 simple:
19897 tmp = ix86_expand_compare (code, op0, op1);
19898 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19899 gen_rtx_LABEL_REF (VOIDmode, label),
19900 pc_rtx);
19901 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19902 return;
19903
19904 case DImode:
19905 if (TARGET_64BIT)
19906 goto simple;
19907 case TImode:
19908 /* Expand DImode branch into multiple compare+branch. */
19909 {
19910 rtx lo[2], hi[2], label2;
19911 enum rtx_code code1, code2, code3;
19912 enum machine_mode submode;
19913
19914 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19915 {
19916 tmp = op0, op0 = op1, op1 = tmp;
19917 code = swap_condition (code);
19918 }
19919
19920 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19921 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19922
19923 submode = mode == DImode ? SImode : DImode;
19924
19925 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19926 avoid two branches. This costs one extra insn, so disable when
19927 optimizing for size. */
19928
19929 if ((code == EQ || code == NE)
19930 && (!optimize_insn_for_size_p ()
19931 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19932 {
19933 rtx xor0, xor1;
19934
19935 xor1 = hi[0];
19936 if (hi[1] != const0_rtx)
19937 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19938 NULL_RTX, 0, OPTAB_WIDEN);
19939
19940 xor0 = lo[0];
19941 if (lo[1] != const0_rtx)
19942 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19943 NULL_RTX, 0, OPTAB_WIDEN);
19944
19945 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19946 NULL_RTX, 0, OPTAB_WIDEN);
19947
19948 ix86_expand_branch (code, tmp, const0_rtx, label);
19949 return;
19950 }
19951
19952 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19953 op1 is a constant and the low word is zero, then we can just
19954 examine the high word. Similarly for low word -1 and
19955 less-or-equal-than or greater-than. */
19956
19957 if (CONST_INT_P (hi[1]))
19958 switch (code)
19959 {
19960 case LT: case LTU: case GE: case GEU:
19961 if (lo[1] == const0_rtx)
19962 {
19963 ix86_expand_branch (code, hi[0], hi[1], label);
19964 return;
19965 }
19966 break;
19967 case LE: case LEU: case GT: case GTU:
19968 if (lo[1] == constm1_rtx)
19969 {
19970 ix86_expand_branch (code, hi[0], hi[1], label);
19971 return;
19972 }
19973 break;
19974 default:
19975 break;
19976 }
19977
19978 /* Otherwise, we need two or three jumps. */
19979
19980 label2 = gen_label_rtx ();
19981
19982 code1 = code;
19983 code2 = swap_condition (code);
19984 code3 = unsigned_condition (code);
19985
19986 switch (code)
19987 {
19988 case LT: case GT: case LTU: case GTU:
19989 break;
19990
19991 case LE: code1 = LT; code2 = GT; break;
19992 case GE: code1 = GT; code2 = LT; break;
19993 case LEU: code1 = LTU; code2 = GTU; break;
19994 case GEU: code1 = GTU; code2 = LTU; break;
19995
19996 case EQ: code1 = UNKNOWN; code2 = NE; break;
19997 case NE: code2 = UNKNOWN; break;
19998
19999 default:
20000 gcc_unreachable ();
20001 }
20002
20003 /*
20004 * a < b =>
20005 * if (hi(a) < hi(b)) goto true;
20006 * if (hi(a) > hi(b)) goto false;
20007 * if (lo(a) < lo(b)) goto true;
20008 * false:
20009 */
20010
20011 if (code1 != UNKNOWN)
20012 ix86_expand_branch (code1, hi[0], hi[1], label);
20013 if (code2 != UNKNOWN)
20014 ix86_expand_branch (code2, hi[0], hi[1], label2);
20015
20016 ix86_expand_branch (code3, lo[0], lo[1], label);
20017
20018 if (code2 != UNKNOWN)
20019 emit_label (label2);
20020 return;
20021 }
20022
20023 default:
20024 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20025 goto simple;
20026 }
20027 }
20028
20029 /* Split branch based on floating point condition. */
20030 void
20031 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20032 rtx target1, rtx target2, rtx tmp)
20033 {
20034 rtx condition;
20035 rtx i;
20036
20037 if (target2 != pc_rtx)
20038 {
20039 rtx tmp = target2;
20040 code = reverse_condition_maybe_unordered (code);
20041 target2 = target1;
20042 target1 = tmp;
20043 }
20044
20045 condition = ix86_expand_fp_compare (code, op1, op2,
20046 tmp);
20047
20048 i = emit_jump_insn (gen_rtx_SET
20049 (VOIDmode, pc_rtx,
20050 gen_rtx_IF_THEN_ELSE (VOIDmode,
20051 condition, target1, target2)));
20052 if (split_branch_probability >= 0)
20053 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20054 }
20055
20056 void
20057 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20058 {
20059 rtx ret;
20060
20061 gcc_assert (GET_MODE (dest) == QImode);
20062
20063 ret = ix86_expand_compare (code, op0, op1);
20064 PUT_MODE (ret, QImode);
20065 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20066 }
20067
20068 /* Expand comparison setting or clearing carry flag. Return true when
20069 successful and set pop for the operation. */
20070 static bool
20071 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20072 {
20073 enum machine_mode mode =
20074 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20075
20076 /* Do not handle double-mode compares that go through special path. */
20077 if (mode == (TARGET_64BIT ? TImode : DImode))
20078 return false;
20079
20080 if (SCALAR_FLOAT_MODE_P (mode))
20081 {
20082 rtx compare_op, compare_seq;
20083
20084 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20085
20086 /* Shortcut: following common codes never translate
20087 into carry flag compares. */
20088 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20089 || code == ORDERED || code == UNORDERED)
20090 return false;
20091
20092 /* These comparisons require zero flag; swap operands so they won't. */
20093 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20094 && !TARGET_IEEE_FP)
20095 {
20096 rtx tmp = op0;
20097 op0 = op1;
20098 op1 = tmp;
20099 code = swap_condition (code);
20100 }
20101
20102 /* Try to expand the comparison and verify that we end up with
20103 carry flag based comparison. This fails to be true only when
20104 we decide to expand comparison using arithmetic that is not
20105 too common scenario. */
20106 start_sequence ();
20107 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20108 compare_seq = get_insns ();
20109 end_sequence ();
20110
20111 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20112 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20113 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20114 else
20115 code = GET_CODE (compare_op);
20116
20117 if (code != LTU && code != GEU)
20118 return false;
20119
20120 emit_insn (compare_seq);
20121 *pop = compare_op;
20122 return true;
20123 }
20124
20125 if (!INTEGRAL_MODE_P (mode))
20126 return false;
20127
20128 switch (code)
20129 {
20130 case LTU:
20131 case GEU:
20132 break;
20133
20134 /* Convert a==0 into (unsigned)a<1. */
20135 case EQ:
20136 case NE:
20137 if (op1 != const0_rtx)
20138 return false;
20139 op1 = const1_rtx;
20140 code = (code == EQ ? LTU : GEU);
20141 break;
20142
20143 /* Convert a>b into b<a or a>=b-1. */
20144 case GTU:
20145 case LEU:
20146 if (CONST_INT_P (op1))
20147 {
20148 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20149 /* Bail out on overflow. We still can swap operands but that
20150 would force loading of the constant into register. */
20151 if (op1 == const0_rtx
20152 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20153 return false;
20154 code = (code == GTU ? GEU : LTU);
20155 }
20156 else
20157 {
20158 rtx tmp = op1;
20159 op1 = op0;
20160 op0 = tmp;
20161 code = (code == GTU ? LTU : GEU);
20162 }
20163 break;
20164
20165 /* Convert a>=0 into (unsigned)a<0x80000000. */
20166 case LT:
20167 case GE:
20168 if (mode == DImode || op1 != const0_rtx)
20169 return false;
20170 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20171 code = (code == LT ? GEU : LTU);
20172 break;
20173 case LE:
20174 case GT:
20175 if (mode == DImode || op1 != constm1_rtx)
20176 return false;
20177 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20178 code = (code == LE ? GEU : LTU);
20179 break;
20180
20181 default:
20182 return false;
20183 }
20184 /* Swapping operands may cause constant to appear as first operand. */
20185 if (!nonimmediate_operand (op0, VOIDmode))
20186 {
20187 if (!can_create_pseudo_p ())
20188 return false;
20189 op0 = force_reg (mode, op0);
20190 }
20191 *pop = ix86_expand_compare (code, op0, op1);
20192 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20193 return true;
20194 }
20195
20196 bool
20197 ix86_expand_int_movcc (rtx operands[])
20198 {
20199 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20200 rtx compare_seq, compare_op;
20201 enum machine_mode mode = GET_MODE (operands[0]);
20202 bool sign_bit_compare_p = false;
20203 rtx op0 = XEXP (operands[1], 0);
20204 rtx op1 = XEXP (operands[1], 1);
20205
20206 if (GET_MODE (op0) == TImode
20207 || (GET_MODE (op0) == DImode
20208 && !TARGET_64BIT))
20209 return false;
20210
20211 start_sequence ();
20212 compare_op = ix86_expand_compare (code, op0, op1);
20213 compare_seq = get_insns ();
20214 end_sequence ();
20215
20216 compare_code = GET_CODE (compare_op);
20217
20218 if ((op1 == const0_rtx && (code == GE || code == LT))
20219 || (op1 == constm1_rtx && (code == GT || code == LE)))
20220 sign_bit_compare_p = true;
20221
20222 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20223 HImode insns, we'd be swallowed in word prefix ops. */
20224
20225 if ((mode != HImode || TARGET_FAST_PREFIX)
20226 && (mode != (TARGET_64BIT ? TImode : DImode))
20227 && CONST_INT_P (operands[2])
20228 && CONST_INT_P (operands[3]))
20229 {
20230 rtx out = operands[0];
20231 HOST_WIDE_INT ct = INTVAL (operands[2]);
20232 HOST_WIDE_INT cf = INTVAL (operands[3]);
20233 HOST_WIDE_INT diff;
20234
20235 diff = ct - cf;
20236 /* Sign bit compares are better done using shifts than we do by using
20237 sbb. */
20238 if (sign_bit_compare_p
20239 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20240 {
20241 /* Detect overlap between destination and compare sources. */
20242 rtx tmp = out;
20243
20244 if (!sign_bit_compare_p)
20245 {
20246 rtx flags;
20247 bool fpcmp = false;
20248
20249 compare_code = GET_CODE (compare_op);
20250
20251 flags = XEXP (compare_op, 0);
20252
20253 if (GET_MODE (flags) == CCFPmode
20254 || GET_MODE (flags) == CCFPUmode)
20255 {
20256 fpcmp = true;
20257 compare_code
20258 = ix86_fp_compare_code_to_integer (compare_code);
20259 }
20260
20261 /* To simplify rest of code, restrict to the GEU case. */
20262 if (compare_code == LTU)
20263 {
20264 HOST_WIDE_INT tmp = ct;
20265 ct = cf;
20266 cf = tmp;
20267 compare_code = reverse_condition (compare_code);
20268 code = reverse_condition (code);
20269 }
20270 else
20271 {
20272 if (fpcmp)
20273 PUT_CODE (compare_op,
20274 reverse_condition_maybe_unordered
20275 (GET_CODE (compare_op)));
20276 else
20277 PUT_CODE (compare_op,
20278 reverse_condition (GET_CODE (compare_op)));
20279 }
20280 diff = ct - cf;
20281
20282 if (reg_overlap_mentioned_p (out, op0)
20283 || reg_overlap_mentioned_p (out, op1))
20284 tmp = gen_reg_rtx (mode);
20285
20286 if (mode == DImode)
20287 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20288 else
20289 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20290 flags, compare_op));
20291 }
20292 else
20293 {
20294 if (code == GT || code == GE)
20295 code = reverse_condition (code);
20296 else
20297 {
20298 HOST_WIDE_INT tmp = ct;
20299 ct = cf;
20300 cf = tmp;
20301 diff = ct - cf;
20302 }
20303 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20304 }
20305
20306 if (diff == 1)
20307 {
20308 /*
20309 * cmpl op0,op1
20310 * sbbl dest,dest
20311 * [addl dest, ct]
20312 *
20313 * Size 5 - 8.
20314 */
20315 if (ct)
20316 tmp = expand_simple_binop (mode, PLUS,
20317 tmp, GEN_INT (ct),
20318 copy_rtx (tmp), 1, OPTAB_DIRECT);
20319 }
20320 else if (cf == -1)
20321 {
20322 /*
20323 * cmpl op0,op1
20324 * sbbl dest,dest
20325 * orl $ct, dest
20326 *
20327 * Size 8.
20328 */
20329 tmp = expand_simple_binop (mode, IOR,
20330 tmp, GEN_INT (ct),
20331 copy_rtx (tmp), 1, OPTAB_DIRECT);
20332 }
20333 else if (diff == -1 && ct)
20334 {
20335 /*
20336 * cmpl op0,op1
20337 * sbbl dest,dest
20338 * notl dest
20339 * [addl dest, cf]
20340 *
20341 * Size 8 - 11.
20342 */
20343 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20344 if (cf)
20345 tmp = expand_simple_binop (mode, PLUS,
20346 copy_rtx (tmp), GEN_INT (cf),
20347 copy_rtx (tmp), 1, OPTAB_DIRECT);
20348 }
20349 else
20350 {
20351 /*
20352 * cmpl op0,op1
20353 * sbbl dest,dest
20354 * [notl dest]
20355 * andl cf - ct, dest
20356 * [addl dest, ct]
20357 *
20358 * Size 8 - 11.
20359 */
20360
20361 if (cf == 0)
20362 {
20363 cf = ct;
20364 ct = 0;
20365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20366 }
20367
20368 tmp = expand_simple_binop (mode, AND,
20369 copy_rtx (tmp),
20370 gen_int_mode (cf - ct, mode),
20371 copy_rtx (tmp), 1, OPTAB_DIRECT);
20372 if (ct)
20373 tmp = expand_simple_binop (mode, PLUS,
20374 copy_rtx (tmp), GEN_INT (ct),
20375 copy_rtx (tmp), 1, OPTAB_DIRECT);
20376 }
20377
20378 if (!rtx_equal_p (tmp, out))
20379 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20380
20381 return true;
20382 }
20383
20384 if (diff < 0)
20385 {
20386 enum machine_mode cmp_mode = GET_MODE (op0);
20387
20388 HOST_WIDE_INT tmp;
20389 tmp = ct, ct = cf, cf = tmp;
20390 diff = -diff;
20391
20392 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20393 {
20394 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20395
20396 /* We may be reversing unordered compare to normal compare, that
20397 is not valid in general (we may convert non-trapping condition
20398 to trapping one), however on i386 we currently emit all
20399 comparisons unordered. */
20400 compare_code = reverse_condition_maybe_unordered (compare_code);
20401 code = reverse_condition_maybe_unordered (code);
20402 }
20403 else
20404 {
20405 compare_code = reverse_condition (compare_code);
20406 code = reverse_condition (code);
20407 }
20408 }
20409
20410 compare_code = UNKNOWN;
20411 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20412 && CONST_INT_P (op1))
20413 {
20414 if (op1 == const0_rtx
20415 && (code == LT || code == GE))
20416 compare_code = code;
20417 else if (op1 == constm1_rtx)
20418 {
20419 if (code == LE)
20420 compare_code = LT;
20421 else if (code == GT)
20422 compare_code = GE;
20423 }
20424 }
20425
20426 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20427 if (compare_code != UNKNOWN
20428 && GET_MODE (op0) == GET_MODE (out)
20429 && (cf == -1 || ct == -1))
20430 {
20431 /* If lea code below could be used, only optimize
20432 if it results in a 2 insn sequence. */
20433
20434 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20435 || diff == 3 || diff == 5 || diff == 9)
20436 || (compare_code == LT && ct == -1)
20437 || (compare_code == GE && cf == -1))
20438 {
20439 /*
20440 * notl op1 (if necessary)
20441 * sarl $31, op1
20442 * orl cf, op1
20443 */
20444 if (ct != -1)
20445 {
20446 cf = ct;
20447 ct = -1;
20448 code = reverse_condition (code);
20449 }
20450
20451 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20452
20453 out = expand_simple_binop (mode, IOR,
20454 out, GEN_INT (cf),
20455 out, 1, OPTAB_DIRECT);
20456 if (out != operands[0])
20457 emit_move_insn (operands[0], out);
20458
20459 return true;
20460 }
20461 }
20462
20463
20464 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20465 || diff == 3 || diff == 5 || diff == 9)
20466 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20467 && (mode != DImode
20468 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20469 {
20470 /*
20471 * xorl dest,dest
20472 * cmpl op1,op2
20473 * setcc dest
20474 * lea cf(dest*(ct-cf)),dest
20475 *
20476 * Size 14.
20477 *
20478 * This also catches the degenerate setcc-only case.
20479 */
20480
20481 rtx tmp;
20482 int nops;
20483
20484 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20485
20486 nops = 0;
20487 /* On x86_64 the lea instruction operates on Pmode, so we need
20488 to get arithmetics done in proper mode to match. */
20489 if (diff == 1)
20490 tmp = copy_rtx (out);
20491 else
20492 {
20493 rtx out1;
20494 out1 = copy_rtx (out);
20495 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20496 nops++;
20497 if (diff & 1)
20498 {
20499 tmp = gen_rtx_PLUS (mode, tmp, out1);
20500 nops++;
20501 }
20502 }
20503 if (cf != 0)
20504 {
20505 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20506 nops++;
20507 }
20508 if (!rtx_equal_p (tmp, out))
20509 {
20510 if (nops == 1)
20511 out = force_operand (tmp, copy_rtx (out));
20512 else
20513 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20514 }
20515 if (!rtx_equal_p (out, operands[0]))
20516 emit_move_insn (operands[0], copy_rtx (out));
20517
20518 return true;
20519 }
20520
20521 /*
20522 * General case: Jumpful:
20523 * xorl dest,dest cmpl op1, op2
20524 * cmpl op1, op2 movl ct, dest
20525 * setcc dest jcc 1f
20526 * decl dest movl cf, dest
20527 * andl (cf-ct),dest 1:
20528 * addl ct,dest
20529 *
20530 * Size 20. Size 14.
20531 *
20532 * This is reasonably steep, but branch mispredict costs are
20533 * high on modern cpus, so consider failing only if optimizing
20534 * for space.
20535 */
20536
20537 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20538 && BRANCH_COST (optimize_insn_for_speed_p (),
20539 false) >= 2)
20540 {
20541 if (cf == 0)
20542 {
20543 enum machine_mode cmp_mode = GET_MODE (op0);
20544
20545 cf = ct;
20546 ct = 0;
20547
20548 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20549 {
20550 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20551
20552 /* We may be reversing unordered compare to normal compare,
20553 that is not valid in general (we may convert non-trapping
20554 condition to trapping one), however on i386 we currently
20555 emit all comparisons unordered. */
20556 code = reverse_condition_maybe_unordered (code);
20557 }
20558 else
20559 {
20560 code = reverse_condition (code);
20561 if (compare_code != UNKNOWN)
20562 compare_code = reverse_condition (compare_code);
20563 }
20564 }
20565
20566 if (compare_code != UNKNOWN)
20567 {
20568 /* notl op1 (if needed)
20569 sarl $31, op1
20570 andl (cf-ct), op1
20571 addl ct, op1
20572
20573 For x < 0 (resp. x <= -1) there will be no notl,
20574 so if possible swap the constants to get rid of the
20575 complement.
20576 True/false will be -1/0 while code below (store flag
20577 followed by decrement) is 0/-1, so the constants need
20578 to be exchanged once more. */
20579
20580 if (compare_code == GE || !cf)
20581 {
20582 code = reverse_condition (code);
20583 compare_code = LT;
20584 }
20585 else
20586 {
20587 HOST_WIDE_INT tmp = cf;
20588 cf = ct;
20589 ct = tmp;
20590 }
20591
20592 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20593 }
20594 else
20595 {
20596 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20597
20598 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20599 constm1_rtx,
20600 copy_rtx (out), 1, OPTAB_DIRECT);
20601 }
20602
20603 out = expand_simple_binop (mode, AND, copy_rtx (out),
20604 gen_int_mode (cf - ct, mode),
20605 copy_rtx (out), 1, OPTAB_DIRECT);
20606 if (ct)
20607 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20608 copy_rtx (out), 1, OPTAB_DIRECT);
20609 if (!rtx_equal_p (out, operands[0]))
20610 emit_move_insn (operands[0], copy_rtx (out));
20611
20612 return true;
20613 }
20614 }
20615
20616 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20617 {
20618 /* Try a few things more with specific constants and a variable. */
20619
20620 optab op;
20621 rtx var, orig_out, out, tmp;
20622
20623 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20624 return false;
20625
20626 /* If one of the two operands is an interesting constant, load a
20627 constant with the above and mask it in with a logical operation. */
20628
20629 if (CONST_INT_P (operands[2]))
20630 {
20631 var = operands[3];
20632 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20633 operands[3] = constm1_rtx, op = and_optab;
20634 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20635 operands[3] = const0_rtx, op = ior_optab;
20636 else
20637 return false;
20638 }
20639 else if (CONST_INT_P (operands[3]))
20640 {
20641 var = operands[2];
20642 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20643 operands[2] = constm1_rtx, op = and_optab;
20644 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20645 operands[2] = const0_rtx, op = ior_optab;
20646 else
20647 return false;
20648 }
20649 else
20650 return false;
20651
20652 orig_out = operands[0];
20653 tmp = gen_reg_rtx (mode);
20654 operands[0] = tmp;
20655
20656 /* Recurse to get the constant loaded. */
20657 if (ix86_expand_int_movcc (operands) == 0)
20658 return false;
20659
20660 /* Mask in the interesting variable. */
20661 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20662 OPTAB_WIDEN);
20663 if (!rtx_equal_p (out, orig_out))
20664 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20665
20666 return true;
20667 }
20668
20669 /*
20670 * For comparison with above,
20671 *
20672 * movl cf,dest
20673 * movl ct,tmp
20674 * cmpl op1,op2
20675 * cmovcc tmp,dest
20676 *
20677 * Size 15.
20678 */
20679
20680 if (! nonimmediate_operand (operands[2], mode))
20681 operands[2] = force_reg (mode, operands[2]);
20682 if (! nonimmediate_operand (operands[3], mode))
20683 operands[3] = force_reg (mode, operands[3]);
20684
20685 if (! register_operand (operands[2], VOIDmode)
20686 && (mode == QImode
20687 || ! register_operand (operands[3], VOIDmode)))
20688 operands[2] = force_reg (mode, operands[2]);
20689
20690 if (mode == QImode
20691 && ! register_operand (operands[3], VOIDmode))
20692 operands[3] = force_reg (mode, operands[3]);
20693
20694 emit_insn (compare_seq);
20695 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20696 gen_rtx_IF_THEN_ELSE (mode,
20697 compare_op, operands[2],
20698 operands[3])));
20699 return true;
20700 }
20701
20702 /* Swap, force into registers, or otherwise massage the two operands
20703 to an sse comparison with a mask result. Thus we differ a bit from
20704 ix86_prepare_fp_compare_args which expects to produce a flags result.
20705
20706 The DEST operand exists to help determine whether to commute commutative
20707 operators. The POP0/POP1 operands are updated in place. The new
20708 comparison code is returned, or UNKNOWN if not implementable. */
20709
20710 static enum rtx_code
20711 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20712 rtx *pop0, rtx *pop1)
20713 {
20714 rtx tmp;
20715
20716 switch (code)
20717 {
20718 case LTGT:
20719 case UNEQ:
20720 /* AVX supports all the needed comparisons. */
20721 if (TARGET_AVX)
20722 break;
20723 /* We have no LTGT as an operator. We could implement it with
20724 NE & ORDERED, but this requires an extra temporary. It's
20725 not clear that it's worth it. */
20726 return UNKNOWN;
20727
20728 case LT:
20729 case LE:
20730 case UNGT:
20731 case UNGE:
20732 /* These are supported directly. */
20733 break;
20734
20735 case EQ:
20736 case NE:
20737 case UNORDERED:
20738 case ORDERED:
20739 /* AVX has 3 operand comparisons, no need to swap anything. */
20740 if (TARGET_AVX)
20741 break;
20742 /* For commutative operators, try to canonicalize the destination
20743 operand to be first in the comparison - this helps reload to
20744 avoid extra moves. */
20745 if (!dest || !rtx_equal_p (dest, *pop1))
20746 break;
20747 /* FALLTHRU */
20748
20749 case GE:
20750 case GT:
20751 case UNLE:
20752 case UNLT:
20753 /* These are not supported directly before AVX, and furthermore
20754 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20755 comparison operands to transform into something that is
20756 supported. */
20757 tmp = *pop0;
20758 *pop0 = *pop1;
20759 *pop1 = tmp;
20760 code = swap_condition (code);
20761 break;
20762
20763 default:
20764 gcc_unreachable ();
20765 }
20766
20767 return code;
20768 }
20769
20770 /* Detect conditional moves that exactly match min/max operational
20771 semantics. Note that this is IEEE safe, as long as we don't
20772 interchange the operands.
20773
20774 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20775 and TRUE if the operation is successful and instructions are emitted. */
20776
20777 static bool
20778 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20779 rtx cmp_op1, rtx if_true, rtx if_false)
20780 {
20781 enum machine_mode mode;
20782 bool is_min;
20783 rtx tmp;
20784
20785 if (code == LT)
20786 ;
20787 else if (code == UNGE)
20788 {
20789 tmp = if_true;
20790 if_true = if_false;
20791 if_false = tmp;
20792 }
20793 else
20794 return false;
20795
20796 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20797 is_min = true;
20798 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20799 is_min = false;
20800 else
20801 return false;
20802
20803 mode = GET_MODE (dest);
20804
20805 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20806 but MODE may be a vector mode and thus not appropriate. */
20807 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20808 {
20809 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20810 rtvec v;
20811
20812 if_true = force_reg (mode, if_true);
20813 v = gen_rtvec (2, if_true, if_false);
20814 tmp = gen_rtx_UNSPEC (mode, v, u);
20815 }
20816 else
20817 {
20818 code = is_min ? SMIN : SMAX;
20819 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20820 }
20821
20822 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20823 return true;
20824 }
20825
20826 /* Expand an sse vector comparison. Return the register with the result. */
20827
20828 static rtx
20829 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20830 rtx op_true, rtx op_false)
20831 {
20832 enum machine_mode mode = GET_MODE (dest);
20833 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20834
20835 /* In general case result of comparison can differ from operands' type. */
20836 enum machine_mode cmp_mode;
20837
20838 /* In AVX512F the result of comparison is an integer mask. */
20839 bool maskcmp = false;
20840 rtx x;
20841
20842 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20843 {
20844 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20845 gcc_assert (cmp_mode != BLKmode);
20846
20847 maskcmp = true;
20848 }
20849 else
20850 cmp_mode = cmp_ops_mode;
20851
20852
20853 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20854 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20855 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20856
20857 if (optimize
20858 || reg_overlap_mentioned_p (dest, op_true)
20859 || reg_overlap_mentioned_p (dest, op_false))
20860 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20861
20862 /* Compare patterns for int modes are unspec in AVX512F only. */
20863 if (maskcmp && (code == GT || code == EQ))
20864 {
20865 rtx (*gen)(rtx, rtx, rtx);
20866
20867 switch (cmp_ops_mode)
20868 {
20869 case V16SImode:
20870 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20871 break;
20872 case V8DImode:
20873 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20874 break;
20875 default:
20876 gen = NULL;
20877 }
20878
20879 if (gen)
20880 {
20881 emit_insn (gen (dest, cmp_op0, cmp_op1));
20882 return dest;
20883 }
20884 }
20885 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20886
20887 if (cmp_mode != mode && !maskcmp)
20888 {
20889 x = force_reg (cmp_ops_mode, x);
20890 convert_move (dest, x, false);
20891 }
20892 else
20893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20894
20895 return dest;
20896 }
20897
20898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20899 operations. This is used for both scalar and vector conditional moves. */
20900
20901 static void
20902 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20903 {
20904 enum machine_mode mode = GET_MODE (dest);
20905 enum machine_mode cmpmode = GET_MODE (cmp);
20906
20907 /* In AVX512F the result of comparison is an integer mask. */
20908 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20909
20910 rtx t2, t3, x;
20911
20912 if (vector_all_ones_operand (op_true, mode)
20913 && rtx_equal_p (op_false, CONST0_RTX (mode))
20914 && !maskcmp)
20915 {
20916 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20917 }
20918 else if (op_false == CONST0_RTX (mode)
20919 && !maskcmp)
20920 {
20921 op_true = force_reg (mode, op_true);
20922 x = gen_rtx_AND (mode, cmp, op_true);
20923 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20924 }
20925 else if (op_true == CONST0_RTX (mode)
20926 && !maskcmp)
20927 {
20928 op_false = force_reg (mode, op_false);
20929 x = gen_rtx_NOT (mode, cmp);
20930 x = gen_rtx_AND (mode, x, op_false);
20931 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20932 }
20933 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20934 && !maskcmp)
20935 {
20936 op_false = force_reg (mode, op_false);
20937 x = gen_rtx_IOR (mode, cmp, op_false);
20938 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20939 }
20940 else if (TARGET_XOP
20941 && !maskcmp)
20942 {
20943 op_true = force_reg (mode, op_true);
20944
20945 if (!nonimmediate_operand (op_false, mode))
20946 op_false = force_reg (mode, op_false);
20947
20948 emit_insn (gen_rtx_SET (mode, dest,
20949 gen_rtx_IF_THEN_ELSE (mode, cmp,
20950 op_true,
20951 op_false)));
20952 }
20953 else
20954 {
20955 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20956 rtx d = dest;
20957
20958 if (!nonimmediate_operand (op_true, mode))
20959 op_true = force_reg (mode, op_true);
20960
20961 op_false = force_reg (mode, op_false);
20962
20963 switch (mode)
20964 {
20965 case V4SFmode:
20966 if (TARGET_SSE4_1)
20967 gen = gen_sse4_1_blendvps;
20968 break;
20969 case V2DFmode:
20970 if (TARGET_SSE4_1)
20971 gen = gen_sse4_1_blendvpd;
20972 break;
20973 case V16QImode:
20974 case V8HImode:
20975 case V4SImode:
20976 case V2DImode:
20977 if (TARGET_SSE4_1)
20978 {
20979 gen = gen_sse4_1_pblendvb;
20980 if (mode != V16QImode)
20981 d = gen_reg_rtx (V16QImode);
20982 op_false = gen_lowpart (V16QImode, op_false);
20983 op_true = gen_lowpart (V16QImode, op_true);
20984 cmp = gen_lowpart (V16QImode, cmp);
20985 }
20986 break;
20987 case V8SFmode:
20988 if (TARGET_AVX)
20989 gen = gen_avx_blendvps256;
20990 break;
20991 case V4DFmode:
20992 if (TARGET_AVX)
20993 gen = gen_avx_blendvpd256;
20994 break;
20995 case V32QImode:
20996 case V16HImode:
20997 case V8SImode:
20998 case V4DImode:
20999 if (TARGET_AVX2)
21000 {
21001 gen = gen_avx2_pblendvb;
21002 if (mode != V32QImode)
21003 d = gen_reg_rtx (V32QImode);
21004 op_false = gen_lowpart (V32QImode, op_false);
21005 op_true = gen_lowpart (V32QImode, op_true);
21006 cmp = gen_lowpart (V32QImode, cmp);
21007 }
21008 break;
21009
21010 case V16SImode:
21011 gen = gen_avx512f_blendmv16si;
21012 break;
21013 case V8DImode:
21014 gen = gen_avx512f_blendmv8di;
21015 break;
21016 case V8DFmode:
21017 gen = gen_avx512f_blendmv8df;
21018 break;
21019 case V16SFmode:
21020 gen = gen_avx512f_blendmv16sf;
21021 break;
21022
21023 default:
21024 break;
21025 }
21026
21027 if (gen != NULL)
21028 {
21029 emit_insn (gen (d, op_false, op_true, cmp));
21030 if (d != dest)
21031 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21032 }
21033 else
21034 {
21035 op_true = force_reg (mode, op_true);
21036
21037 t2 = gen_reg_rtx (mode);
21038 if (optimize)
21039 t3 = gen_reg_rtx (mode);
21040 else
21041 t3 = dest;
21042
21043 x = gen_rtx_AND (mode, op_true, cmp);
21044 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21045
21046 x = gen_rtx_NOT (mode, cmp);
21047 x = gen_rtx_AND (mode, x, op_false);
21048 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21049
21050 x = gen_rtx_IOR (mode, t3, t2);
21051 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21052 }
21053 }
21054 }
21055
21056 /* Expand a floating-point conditional move. Return true if successful. */
21057
21058 bool
21059 ix86_expand_fp_movcc (rtx operands[])
21060 {
21061 enum machine_mode mode = GET_MODE (operands[0]);
21062 enum rtx_code code = GET_CODE (operands[1]);
21063 rtx tmp, compare_op;
21064 rtx op0 = XEXP (operands[1], 0);
21065 rtx op1 = XEXP (operands[1], 1);
21066
21067 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21068 {
21069 enum machine_mode cmode;
21070
21071 /* Since we've no cmove for sse registers, don't force bad register
21072 allocation just to gain access to it. Deny movcc when the
21073 comparison mode doesn't match the move mode. */
21074 cmode = GET_MODE (op0);
21075 if (cmode == VOIDmode)
21076 cmode = GET_MODE (op1);
21077 if (cmode != mode)
21078 return false;
21079
21080 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21081 if (code == UNKNOWN)
21082 return false;
21083
21084 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21085 operands[2], operands[3]))
21086 return true;
21087
21088 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21089 operands[2], operands[3]);
21090 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21091 return true;
21092 }
21093
21094 if (GET_MODE (op0) == TImode
21095 || (GET_MODE (op0) == DImode
21096 && !TARGET_64BIT))
21097 return false;
21098
21099 /* The floating point conditional move instructions don't directly
21100 support conditions resulting from a signed integer comparison. */
21101
21102 compare_op = ix86_expand_compare (code, op0, op1);
21103 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21104 {
21105 tmp = gen_reg_rtx (QImode);
21106 ix86_expand_setcc (tmp, code, op0, op1);
21107
21108 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21109 }
21110
21111 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21112 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21113 operands[2], operands[3])));
21114
21115 return true;
21116 }
21117
21118 /* Expand a floating-point vector conditional move; a vcond operation
21119 rather than a movcc operation. */
21120
21121 bool
21122 ix86_expand_fp_vcond (rtx operands[])
21123 {
21124 enum rtx_code code = GET_CODE (operands[3]);
21125 rtx cmp;
21126
21127 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21128 &operands[4], &operands[5]);
21129 if (code == UNKNOWN)
21130 {
21131 rtx temp;
21132 switch (GET_CODE (operands[3]))
21133 {
21134 case LTGT:
21135 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21136 operands[5], operands[0], operands[0]);
21137 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21138 operands[5], operands[1], operands[2]);
21139 code = AND;
21140 break;
21141 case UNEQ:
21142 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21143 operands[5], operands[0], operands[0]);
21144 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21145 operands[5], operands[1], operands[2]);
21146 code = IOR;
21147 break;
21148 default:
21149 gcc_unreachable ();
21150 }
21151 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21152 OPTAB_DIRECT);
21153 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21154 return true;
21155 }
21156
21157 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21158 operands[5], operands[1], operands[2]))
21159 return true;
21160
21161 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21162 operands[1], operands[2]);
21163 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21164 return true;
21165 }
21166
21167 /* Expand a signed/unsigned integral vector conditional move. */
21168
21169 bool
21170 ix86_expand_int_vcond (rtx operands[])
21171 {
21172 enum machine_mode data_mode = GET_MODE (operands[0]);
21173 enum machine_mode mode = GET_MODE (operands[4]);
21174 enum rtx_code code = GET_CODE (operands[3]);
21175 bool negate = false;
21176 rtx x, cop0, cop1;
21177
21178 cop0 = operands[4];
21179 cop1 = operands[5];
21180
21181 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21182 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21183 if ((code == LT || code == GE)
21184 && data_mode == mode
21185 && cop1 == CONST0_RTX (mode)
21186 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21187 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21188 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21189 && (GET_MODE_SIZE (data_mode) == 16
21190 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21191 {
21192 rtx negop = operands[2 - (code == LT)];
21193 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21194 if (negop == CONST1_RTX (data_mode))
21195 {
21196 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21197 operands[0], 1, OPTAB_DIRECT);
21198 if (res != operands[0])
21199 emit_move_insn (operands[0], res);
21200 return true;
21201 }
21202 else if (GET_MODE_INNER (data_mode) != DImode
21203 && vector_all_ones_operand (negop, data_mode))
21204 {
21205 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21206 operands[0], 0, OPTAB_DIRECT);
21207 if (res != operands[0])
21208 emit_move_insn (operands[0], res);
21209 return true;
21210 }
21211 }
21212
21213 if (!nonimmediate_operand (cop1, mode))
21214 cop1 = force_reg (mode, cop1);
21215 if (!general_operand (operands[1], data_mode))
21216 operands[1] = force_reg (data_mode, operands[1]);
21217 if (!general_operand (operands[2], data_mode))
21218 operands[2] = force_reg (data_mode, operands[2]);
21219
21220 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21221 if (TARGET_XOP
21222 && (mode == V16QImode || mode == V8HImode
21223 || mode == V4SImode || mode == V2DImode))
21224 ;
21225 else
21226 {
21227 /* Canonicalize the comparison to EQ, GT, GTU. */
21228 switch (code)
21229 {
21230 case EQ:
21231 case GT:
21232 case GTU:
21233 break;
21234
21235 case NE:
21236 case LE:
21237 case LEU:
21238 code = reverse_condition (code);
21239 negate = true;
21240 break;
21241
21242 case GE:
21243 case GEU:
21244 code = reverse_condition (code);
21245 negate = true;
21246 /* FALLTHRU */
21247
21248 case LT:
21249 case LTU:
21250 code = swap_condition (code);
21251 x = cop0, cop0 = cop1, cop1 = x;
21252 break;
21253
21254 default:
21255 gcc_unreachable ();
21256 }
21257
21258 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21259 if (mode == V2DImode)
21260 {
21261 switch (code)
21262 {
21263 case EQ:
21264 /* SSE4.1 supports EQ. */
21265 if (!TARGET_SSE4_1)
21266 return false;
21267 break;
21268
21269 case GT:
21270 case GTU:
21271 /* SSE4.2 supports GT/GTU. */
21272 if (!TARGET_SSE4_2)
21273 return false;
21274 break;
21275
21276 default:
21277 gcc_unreachable ();
21278 }
21279 }
21280
21281 /* Unsigned parallel compare is not supported by the hardware.
21282 Play some tricks to turn this into a signed comparison
21283 against 0. */
21284 if (code == GTU)
21285 {
21286 cop0 = force_reg (mode, cop0);
21287
21288 switch (mode)
21289 {
21290 case V16SImode:
21291 case V8DImode:
21292 case V8SImode:
21293 case V4DImode:
21294 case V4SImode:
21295 case V2DImode:
21296 {
21297 rtx t1, t2, mask;
21298 rtx (*gen_sub3) (rtx, rtx, rtx);
21299
21300 switch (mode)
21301 {
21302 case V16SImode: gen_sub3 = gen_subv16si3; break;
21303 case V8DImode: gen_sub3 = gen_subv8di3; break;
21304 case V8SImode: gen_sub3 = gen_subv8si3; break;
21305 case V4DImode: gen_sub3 = gen_subv4di3; break;
21306 case V4SImode: gen_sub3 = gen_subv4si3; break;
21307 case V2DImode: gen_sub3 = gen_subv2di3; break;
21308 default:
21309 gcc_unreachable ();
21310 }
21311 /* Subtract (-(INT MAX) - 1) from both operands to make
21312 them signed. */
21313 mask = ix86_build_signbit_mask (mode, true, false);
21314 t1 = gen_reg_rtx (mode);
21315 emit_insn (gen_sub3 (t1, cop0, mask));
21316
21317 t2 = gen_reg_rtx (mode);
21318 emit_insn (gen_sub3 (t2, cop1, mask));
21319
21320 cop0 = t1;
21321 cop1 = t2;
21322 code = GT;
21323 }
21324 break;
21325
21326 case V32QImode:
21327 case V16HImode:
21328 case V16QImode:
21329 case V8HImode:
21330 /* Perform a parallel unsigned saturating subtraction. */
21331 x = gen_reg_rtx (mode);
21332 emit_insn (gen_rtx_SET (VOIDmode, x,
21333 gen_rtx_US_MINUS (mode, cop0, cop1)));
21334
21335 cop0 = x;
21336 cop1 = CONST0_RTX (mode);
21337 code = EQ;
21338 negate = !negate;
21339 break;
21340
21341 default:
21342 gcc_unreachable ();
21343 }
21344 }
21345 }
21346
21347 /* Allow the comparison to be done in one mode, but the movcc to
21348 happen in another mode. */
21349 if (data_mode == mode)
21350 {
21351 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21352 operands[1+negate], operands[2-negate]);
21353 }
21354 else
21355 {
21356 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21357 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21358 operands[1+negate], operands[2-negate]);
21359 if (GET_MODE (x) == mode)
21360 x = gen_lowpart (data_mode, x);
21361 }
21362
21363 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21364 operands[2-negate]);
21365 return true;
21366 }
21367
21368 static bool
21369 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21370 {
21371 enum machine_mode mode = GET_MODE (op0);
21372 switch (mode)
21373 {
21374 case V16SImode:
21375 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21376 force_reg (V16SImode, mask),
21377 op1));
21378 return true;
21379 case V16SFmode:
21380 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21381 force_reg (V16SImode, mask),
21382 op1));
21383 return true;
21384 case V8DImode:
21385 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21386 force_reg (V8DImode, mask), op1));
21387 return true;
21388 case V8DFmode:
21389 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21390 force_reg (V8DImode, mask), op1));
21391 return true;
21392 default:
21393 return false;
21394 }
21395 }
21396
21397 /* Expand a variable vector permutation. */
21398
21399 void
21400 ix86_expand_vec_perm (rtx operands[])
21401 {
21402 rtx target = operands[0];
21403 rtx op0 = operands[1];
21404 rtx op1 = operands[2];
21405 rtx mask = operands[3];
21406 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21407 enum machine_mode mode = GET_MODE (op0);
21408 enum machine_mode maskmode = GET_MODE (mask);
21409 int w, e, i;
21410 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21411
21412 /* Number of elements in the vector. */
21413 w = GET_MODE_NUNITS (mode);
21414 e = GET_MODE_UNIT_SIZE (mode);
21415 gcc_assert (w <= 64);
21416
21417 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21418 return;
21419
21420 if (TARGET_AVX2)
21421 {
21422 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21423 {
21424 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21425 an constant shuffle operand. With a tiny bit of effort we can
21426 use VPERMD instead. A re-interpretation stall for V4DFmode is
21427 unfortunate but there's no avoiding it.
21428 Similarly for V16HImode we don't have instructions for variable
21429 shuffling, while for V32QImode we can use after preparing suitable
21430 masks vpshufb; vpshufb; vpermq; vpor. */
21431
21432 if (mode == V16HImode)
21433 {
21434 maskmode = mode = V32QImode;
21435 w = 32;
21436 e = 1;
21437 }
21438 else
21439 {
21440 maskmode = mode = V8SImode;
21441 w = 8;
21442 e = 4;
21443 }
21444 t1 = gen_reg_rtx (maskmode);
21445
21446 /* Replicate the low bits of the V4DImode mask into V8SImode:
21447 mask = { A B C D }
21448 t1 = { A A B B C C D D }. */
21449 for (i = 0; i < w / 2; ++i)
21450 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21451 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21452 vt = force_reg (maskmode, vt);
21453 mask = gen_lowpart (maskmode, mask);
21454 if (maskmode == V8SImode)
21455 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21456 else
21457 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21458
21459 /* Multiply the shuffle indicies by two. */
21460 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21461 OPTAB_DIRECT);
21462
21463 /* Add one to the odd shuffle indicies:
21464 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21465 for (i = 0; i < w / 2; ++i)
21466 {
21467 vec[i * 2] = const0_rtx;
21468 vec[i * 2 + 1] = const1_rtx;
21469 }
21470 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21471 vt = validize_mem (force_const_mem (maskmode, vt));
21472 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21473 OPTAB_DIRECT);
21474
21475 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21476 operands[3] = mask = t1;
21477 target = gen_reg_rtx (mode);
21478 op0 = gen_lowpart (mode, op0);
21479 op1 = gen_lowpart (mode, op1);
21480 }
21481
21482 switch (mode)
21483 {
21484 case V8SImode:
21485 /* The VPERMD and VPERMPS instructions already properly ignore
21486 the high bits of the shuffle elements. No need for us to
21487 perform an AND ourselves. */
21488 if (one_operand_shuffle)
21489 {
21490 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21491 if (target != operands[0])
21492 emit_move_insn (operands[0],
21493 gen_lowpart (GET_MODE (operands[0]), target));
21494 }
21495 else
21496 {
21497 t1 = gen_reg_rtx (V8SImode);
21498 t2 = gen_reg_rtx (V8SImode);
21499 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21500 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21501 goto merge_two;
21502 }
21503 return;
21504
21505 case V8SFmode:
21506 mask = gen_lowpart (V8SImode, mask);
21507 if (one_operand_shuffle)
21508 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21509 else
21510 {
21511 t1 = gen_reg_rtx (V8SFmode);
21512 t2 = gen_reg_rtx (V8SFmode);
21513 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21514 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21515 goto merge_two;
21516 }
21517 return;
21518
21519 case V4SImode:
21520 /* By combining the two 128-bit input vectors into one 256-bit
21521 input vector, we can use VPERMD and VPERMPS for the full
21522 two-operand shuffle. */
21523 t1 = gen_reg_rtx (V8SImode);
21524 t2 = gen_reg_rtx (V8SImode);
21525 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21526 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21527 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21528 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21529 return;
21530
21531 case V4SFmode:
21532 t1 = gen_reg_rtx (V8SFmode);
21533 t2 = gen_reg_rtx (V8SImode);
21534 mask = gen_lowpart (V4SImode, mask);
21535 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21536 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21537 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21538 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21539 return;
21540
21541 case V32QImode:
21542 t1 = gen_reg_rtx (V32QImode);
21543 t2 = gen_reg_rtx (V32QImode);
21544 t3 = gen_reg_rtx (V32QImode);
21545 vt2 = GEN_INT (-128);
21546 for (i = 0; i < 32; i++)
21547 vec[i] = vt2;
21548 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21549 vt = force_reg (V32QImode, vt);
21550 for (i = 0; i < 32; i++)
21551 vec[i] = i < 16 ? vt2 : const0_rtx;
21552 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21553 vt2 = force_reg (V32QImode, vt2);
21554 /* From mask create two adjusted masks, which contain the same
21555 bits as mask in the low 7 bits of each vector element.
21556 The first mask will have the most significant bit clear
21557 if it requests element from the same 128-bit lane
21558 and MSB set if it requests element from the other 128-bit lane.
21559 The second mask will have the opposite values of the MSB,
21560 and additionally will have its 128-bit lanes swapped.
21561 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21562 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21563 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21564 stands for other 12 bytes. */
21565 /* The bit whether element is from the same lane or the other
21566 lane is bit 4, so shift it up by 3 to the MSB position. */
21567 t5 = gen_reg_rtx (V4DImode);
21568 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21569 GEN_INT (3)));
21570 /* Clear MSB bits from the mask just in case it had them set. */
21571 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21572 /* After this t1 will have MSB set for elements from other lane. */
21573 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21574 /* Clear bits other than MSB. */
21575 emit_insn (gen_andv32qi3 (t1, t1, vt));
21576 /* Or in the lower bits from mask into t3. */
21577 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21578 /* And invert MSB bits in t1, so MSB is set for elements from the same
21579 lane. */
21580 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21581 /* Swap 128-bit lanes in t3. */
21582 t6 = gen_reg_rtx (V4DImode);
21583 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21584 const2_rtx, GEN_INT (3),
21585 const0_rtx, const1_rtx));
21586 /* And or in the lower bits from mask into t1. */
21587 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21588 if (one_operand_shuffle)
21589 {
21590 /* Each of these shuffles will put 0s in places where
21591 element from the other 128-bit lane is needed, otherwise
21592 will shuffle in the requested value. */
21593 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21594 gen_lowpart (V32QImode, t6)));
21595 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21596 /* For t3 the 128-bit lanes are swapped again. */
21597 t7 = gen_reg_rtx (V4DImode);
21598 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21599 const2_rtx, GEN_INT (3),
21600 const0_rtx, const1_rtx));
21601 /* And oring both together leads to the result. */
21602 emit_insn (gen_iorv32qi3 (target, t1,
21603 gen_lowpart (V32QImode, t7)));
21604 if (target != operands[0])
21605 emit_move_insn (operands[0],
21606 gen_lowpart (GET_MODE (operands[0]), target));
21607 return;
21608 }
21609
21610 t4 = gen_reg_rtx (V32QImode);
21611 /* Similarly to the above one_operand_shuffle code,
21612 just for repeated twice for each operand. merge_two:
21613 code will merge the two results together. */
21614 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21615 gen_lowpart (V32QImode, t6)));
21616 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21617 gen_lowpart (V32QImode, t6)));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21619 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21620 t7 = gen_reg_rtx (V4DImode);
21621 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21622 const2_rtx, GEN_INT (3),
21623 const0_rtx, const1_rtx));
21624 t8 = gen_reg_rtx (V4DImode);
21625 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21626 const2_rtx, GEN_INT (3),
21627 const0_rtx, const1_rtx));
21628 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21629 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21630 t1 = t4;
21631 t2 = t3;
21632 goto merge_two;
21633
21634 default:
21635 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21636 break;
21637 }
21638 }
21639
21640 if (TARGET_XOP)
21641 {
21642 /* The XOP VPPERM insn supports three inputs. By ignoring the
21643 one_operand_shuffle special case, we avoid creating another
21644 set of constant vectors in memory. */
21645 one_operand_shuffle = false;
21646
21647 /* mask = mask & {2*w-1, ...} */
21648 vt = GEN_INT (2*w - 1);
21649 }
21650 else
21651 {
21652 /* mask = mask & {w-1, ...} */
21653 vt = GEN_INT (w - 1);
21654 }
21655
21656 for (i = 0; i < w; i++)
21657 vec[i] = vt;
21658 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21659 mask = expand_simple_binop (maskmode, AND, mask, vt,
21660 NULL_RTX, 0, OPTAB_DIRECT);
21661
21662 /* For non-QImode operations, convert the word permutation control
21663 into a byte permutation control. */
21664 if (mode != V16QImode)
21665 {
21666 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21667 GEN_INT (exact_log2 (e)),
21668 NULL_RTX, 0, OPTAB_DIRECT);
21669
21670 /* Convert mask to vector of chars. */
21671 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21672
21673 /* Replicate each of the input bytes into byte positions:
21674 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21675 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21676 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21677 for (i = 0; i < 16; ++i)
21678 vec[i] = GEN_INT (i/e * e);
21679 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21680 vt = validize_mem (force_const_mem (V16QImode, vt));
21681 if (TARGET_XOP)
21682 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21683 else
21684 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21685
21686 /* Convert it into the byte positions by doing
21687 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21688 for (i = 0; i < 16; ++i)
21689 vec[i] = GEN_INT (i % e);
21690 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21691 vt = validize_mem (force_const_mem (V16QImode, vt));
21692 emit_insn (gen_addv16qi3 (mask, mask, vt));
21693 }
21694
21695 /* The actual shuffle operations all operate on V16QImode. */
21696 op0 = gen_lowpart (V16QImode, op0);
21697 op1 = gen_lowpart (V16QImode, op1);
21698
21699 if (TARGET_XOP)
21700 {
21701 if (GET_MODE (target) != V16QImode)
21702 target = gen_reg_rtx (V16QImode);
21703 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21704 if (target != operands[0])
21705 emit_move_insn (operands[0],
21706 gen_lowpart (GET_MODE (operands[0]), target));
21707 }
21708 else if (one_operand_shuffle)
21709 {
21710 if (GET_MODE (target) != V16QImode)
21711 target = gen_reg_rtx (V16QImode);
21712 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21713 if (target != operands[0])
21714 emit_move_insn (operands[0],
21715 gen_lowpart (GET_MODE (operands[0]), target));
21716 }
21717 else
21718 {
21719 rtx xops[6];
21720 bool ok;
21721
21722 /* Shuffle the two input vectors independently. */
21723 t1 = gen_reg_rtx (V16QImode);
21724 t2 = gen_reg_rtx (V16QImode);
21725 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21726 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21727
21728 merge_two:
21729 /* Then merge them together. The key is whether any given control
21730 element contained a bit set that indicates the second word. */
21731 mask = operands[3];
21732 vt = GEN_INT (w);
21733 if (maskmode == V2DImode && !TARGET_SSE4_1)
21734 {
21735 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21736 more shuffle to convert the V2DI input mask into a V4SI
21737 input mask. At which point the masking that expand_int_vcond
21738 will work as desired. */
21739 rtx t3 = gen_reg_rtx (V4SImode);
21740 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21741 const0_rtx, const0_rtx,
21742 const2_rtx, const2_rtx));
21743 mask = t3;
21744 maskmode = V4SImode;
21745 e = w = 4;
21746 }
21747
21748 for (i = 0; i < w; i++)
21749 vec[i] = vt;
21750 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21751 vt = force_reg (maskmode, vt);
21752 mask = expand_simple_binop (maskmode, AND, mask, vt,
21753 NULL_RTX, 0, OPTAB_DIRECT);
21754
21755 if (GET_MODE (target) != mode)
21756 target = gen_reg_rtx (mode);
21757 xops[0] = target;
21758 xops[1] = gen_lowpart (mode, t2);
21759 xops[2] = gen_lowpart (mode, t1);
21760 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21761 xops[4] = mask;
21762 xops[5] = vt;
21763 ok = ix86_expand_int_vcond (xops);
21764 gcc_assert (ok);
21765 if (target != operands[0])
21766 emit_move_insn (operands[0],
21767 gen_lowpart (GET_MODE (operands[0]), target));
21768 }
21769 }
21770
21771 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21772 true if we should do zero extension, else sign extension. HIGH_P is
21773 true if we want the N/2 high elements, else the low elements. */
21774
21775 void
21776 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21777 {
21778 enum machine_mode imode = GET_MODE (src);
21779 rtx tmp;
21780
21781 if (TARGET_SSE4_1)
21782 {
21783 rtx (*unpack)(rtx, rtx);
21784 rtx (*extract)(rtx, rtx) = NULL;
21785 enum machine_mode halfmode = BLKmode;
21786
21787 switch (imode)
21788 {
21789 case V32QImode:
21790 if (unsigned_p)
21791 unpack = gen_avx2_zero_extendv16qiv16hi2;
21792 else
21793 unpack = gen_avx2_sign_extendv16qiv16hi2;
21794 halfmode = V16QImode;
21795 extract
21796 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21797 break;
21798 case V32HImode:
21799 if (unsigned_p)
21800 unpack = gen_avx512f_zero_extendv16hiv16si2;
21801 else
21802 unpack = gen_avx512f_sign_extendv16hiv16si2;
21803 halfmode = V16HImode;
21804 extract
21805 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21806 break;
21807 case V16HImode:
21808 if (unsigned_p)
21809 unpack = gen_avx2_zero_extendv8hiv8si2;
21810 else
21811 unpack = gen_avx2_sign_extendv8hiv8si2;
21812 halfmode = V8HImode;
21813 extract
21814 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21815 break;
21816 case V16SImode:
21817 if (unsigned_p)
21818 unpack = gen_avx512f_zero_extendv8siv8di2;
21819 else
21820 unpack = gen_avx512f_sign_extendv8siv8di2;
21821 halfmode = V8SImode;
21822 extract
21823 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21824 break;
21825 case V8SImode:
21826 if (unsigned_p)
21827 unpack = gen_avx2_zero_extendv4siv4di2;
21828 else
21829 unpack = gen_avx2_sign_extendv4siv4di2;
21830 halfmode = V4SImode;
21831 extract
21832 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21833 break;
21834 case V16QImode:
21835 if (unsigned_p)
21836 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21837 else
21838 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21839 break;
21840 case V8HImode:
21841 if (unsigned_p)
21842 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21843 else
21844 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21845 break;
21846 case V4SImode:
21847 if (unsigned_p)
21848 unpack = gen_sse4_1_zero_extendv2siv2di2;
21849 else
21850 unpack = gen_sse4_1_sign_extendv2siv2di2;
21851 break;
21852 default:
21853 gcc_unreachable ();
21854 }
21855
21856 if (GET_MODE_SIZE (imode) >= 32)
21857 {
21858 tmp = gen_reg_rtx (halfmode);
21859 emit_insn (extract (tmp, src));
21860 }
21861 else if (high_p)
21862 {
21863 /* Shift higher 8 bytes to lower 8 bytes. */
21864 tmp = gen_reg_rtx (V1TImode);
21865 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21866 GEN_INT (64)));
21867 tmp = gen_lowpart (imode, tmp);
21868 }
21869 else
21870 tmp = src;
21871
21872 emit_insn (unpack (dest, tmp));
21873 }
21874 else
21875 {
21876 rtx (*unpack)(rtx, rtx, rtx);
21877
21878 switch (imode)
21879 {
21880 case V16QImode:
21881 if (high_p)
21882 unpack = gen_vec_interleave_highv16qi;
21883 else
21884 unpack = gen_vec_interleave_lowv16qi;
21885 break;
21886 case V8HImode:
21887 if (high_p)
21888 unpack = gen_vec_interleave_highv8hi;
21889 else
21890 unpack = gen_vec_interleave_lowv8hi;
21891 break;
21892 case V4SImode:
21893 if (high_p)
21894 unpack = gen_vec_interleave_highv4si;
21895 else
21896 unpack = gen_vec_interleave_lowv4si;
21897 break;
21898 default:
21899 gcc_unreachable ();
21900 }
21901
21902 if (unsigned_p)
21903 tmp = force_reg (imode, CONST0_RTX (imode));
21904 else
21905 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21906 src, pc_rtx, pc_rtx);
21907
21908 rtx tmp2 = gen_reg_rtx (imode);
21909 emit_insn (unpack (tmp2, src, tmp));
21910 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21911 }
21912 }
21913
21914 /* Expand conditional increment or decrement using adb/sbb instructions.
21915 The default case using setcc followed by the conditional move can be
21916 done by generic code. */
21917 bool
21918 ix86_expand_int_addcc (rtx operands[])
21919 {
21920 enum rtx_code code = GET_CODE (operands[1]);
21921 rtx flags;
21922 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21923 rtx compare_op;
21924 rtx val = const0_rtx;
21925 bool fpcmp = false;
21926 enum machine_mode mode;
21927 rtx op0 = XEXP (operands[1], 0);
21928 rtx op1 = XEXP (operands[1], 1);
21929
21930 if (operands[3] != const1_rtx
21931 && operands[3] != constm1_rtx)
21932 return false;
21933 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21934 return false;
21935 code = GET_CODE (compare_op);
21936
21937 flags = XEXP (compare_op, 0);
21938
21939 if (GET_MODE (flags) == CCFPmode
21940 || GET_MODE (flags) == CCFPUmode)
21941 {
21942 fpcmp = true;
21943 code = ix86_fp_compare_code_to_integer (code);
21944 }
21945
21946 if (code != LTU)
21947 {
21948 val = constm1_rtx;
21949 if (fpcmp)
21950 PUT_CODE (compare_op,
21951 reverse_condition_maybe_unordered
21952 (GET_CODE (compare_op)));
21953 else
21954 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21955 }
21956
21957 mode = GET_MODE (operands[0]);
21958
21959 /* Construct either adc or sbb insn. */
21960 if ((code == LTU) == (operands[3] == constm1_rtx))
21961 {
21962 switch (mode)
21963 {
21964 case QImode:
21965 insn = gen_subqi3_carry;
21966 break;
21967 case HImode:
21968 insn = gen_subhi3_carry;
21969 break;
21970 case SImode:
21971 insn = gen_subsi3_carry;
21972 break;
21973 case DImode:
21974 insn = gen_subdi3_carry;
21975 break;
21976 default:
21977 gcc_unreachable ();
21978 }
21979 }
21980 else
21981 {
21982 switch (mode)
21983 {
21984 case QImode:
21985 insn = gen_addqi3_carry;
21986 break;
21987 case HImode:
21988 insn = gen_addhi3_carry;
21989 break;
21990 case SImode:
21991 insn = gen_addsi3_carry;
21992 break;
21993 case DImode:
21994 insn = gen_adddi3_carry;
21995 break;
21996 default:
21997 gcc_unreachable ();
21998 }
21999 }
22000 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22001
22002 return true;
22003 }
22004
22005
22006 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22007 but works for floating pointer parameters and nonoffsetable memories.
22008 For pushes, it returns just stack offsets; the values will be saved
22009 in the right order. Maximally three parts are generated. */
22010
22011 static int
22012 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22013 {
22014 int size;
22015
22016 if (!TARGET_64BIT)
22017 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22018 else
22019 size = (GET_MODE_SIZE (mode) + 4) / 8;
22020
22021 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22022 gcc_assert (size >= 2 && size <= 4);
22023
22024 /* Optimize constant pool reference to immediates. This is used by fp
22025 moves, that force all constants to memory to allow combining. */
22026 if (MEM_P (operand) && MEM_READONLY_P (operand))
22027 {
22028 rtx tmp = maybe_get_pool_constant (operand);
22029 if (tmp)
22030 operand = tmp;
22031 }
22032
22033 if (MEM_P (operand) && !offsettable_memref_p (operand))
22034 {
22035 /* The only non-offsetable memories we handle are pushes. */
22036 int ok = push_operand (operand, VOIDmode);
22037
22038 gcc_assert (ok);
22039
22040 operand = copy_rtx (operand);
22041 PUT_MODE (operand, word_mode);
22042 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22043 return size;
22044 }
22045
22046 if (GET_CODE (operand) == CONST_VECTOR)
22047 {
22048 enum machine_mode imode = int_mode_for_mode (mode);
22049 /* Caution: if we looked through a constant pool memory above,
22050 the operand may actually have a different mode now. That's
22051 ok, since we want to pun this all the way back to an integer. */
22052 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22053 gcc_assert (operand != NULL);
22054 mode = imode;
22055 }
22056
22057 if (!TARGET_64BIT)
22058 {
22059 if (mode == DImode)
22060 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22061 else
22062 {
22063 int i;
22064
22065 if (REG_P (operand))
22066 {
22067 gcc_assert (reload_completed);
22068 for (i = 0; i < size; i++)
22069 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22070 }
22071 else if (offsettable_memref_p (operand))
22072 {
22073 operand = adjust_address (operand, SImode, 0);
22074 parts[0] = operand;
22075 for (i = 1; i < size; i++)
22076 parts[i] = adjust_address (operand, SImode, 4 * i);
22077 }
22078 else if (GET_CODE (operand) == CONST_DOUBLE)
22079 {
22080 REAL_VALUE_TYPE r;
22081 long l[4];
22082
22083 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22084 switch (mode)
22085 {
22086 case TFmode:
22087 real_to_target (l, &r, mode);
22088 parts[3] = gen_int_mode (l[3], SImode);
22089 parts[2] = gen_int_mode (l[2], SImode);
22090 break;
22091 case XFmode:
22092 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22093 long double may not be 80-bit. */
22094 real_to_target (l, &r, mode);
22095 parts[2] = gen_int_mode (l[2], SImode);
22096 break;
22097 case DFmode:
22098 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22099 break;
22100 default:
22101 gcc_unreachable ();
22102 }
22103 parts[1] = gen_int_mode (l[1], SImode);
22104 parts[0] = gen_int_mode (l[0], SImode);
22105 }
22106 else
22107 gcc_unreachable ();
22108 }
22109 }
22110 else
22111 {
22112 if (mode == TImode)
22113 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22114 if (mode == XFmode || mode == TFmode)
22115 {
22116 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22117 if (REG_P (operand))
22118 {
22119 gcc_assert (reload_completed);
22120 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22121 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22122 }
22123 else if (offsettable_memref_p (operand))
22124 {
22125 operand = adjust_address (operand, DImode, 0);
22126 parts[0] = operand;
22127 parts[1] = adjust_address (operand, upper_mode, 8);
22128 }
22129 else if (GET_CODE (operand) == CONST_DOUBLE)
22130 {
22131 REAL_VALUE_TYPE r;
22132 long l[4];
22133
22134 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22135 real_to_target (l, &r, mode);
22136
22137 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22138 if (HOST_BITS_PER_WIDE_INT >= 64)
22139 parts[0]
22140 = gen_int_mode
22141 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22142 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22143 DImode);
22144 else
22145 parts[0] = immed_double_const (l[0], l[1], DImode);
22146
22147 if (upper_mode == SImode)
22148 parts[1] = gen_int_mode (l[2], SImode);
22149 else if (HOST_BITS_PER_WIDE_INT >= 64)
22150 parts[1]
22151 = gen_int_mode
22152 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22153 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22154 DImode);
22155 else
22156 parts[1] = immed_double_const (l[2], l[3], DImode);
22157 }
22158 else
22159 gcc_unreachable ();
22160 }
22161 }
22162
22163 return size;
22164 }
22165
22166 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22167 Return false when normal moves are needed; true when all required
22168 insns have been emitted. Operands 2-4 contain the input values
22169 int the correct order; operands 5-7 contain the output values. */
22170
22171 void
22172 ix86_split_long_move (rtx operands[])
22173 {
22174 rtx part[2][4];
22175 int nparts, i, j;
22176 int push = 0;
22177 int collisions = 0;
22178 enum machine_mode mode = GET_MODE (operands[0]);
22179 bool collisionparts[4];
22180
22181 /* The DFmode expanders may ask us to move double.
22182 For 64bit target this is single move. By hiding the fact
22183 here we simplify i386.md splitters. */
22184 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22185 {
22186 /* Optimize constant pool reference to immediates. This is used by
22187 fp moves, that force all constants to memory to allow combining. */
22188
22189 if (MEM_P (operands[1])
22190 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22191 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22192 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22193 if (push_operand (operands[0], VOIDmode))
22194 {
22195 operands[0] = copy_rtx (operands[0]);
22196 PUT_MODE (operands[0], word_mode);
22197 }
22198 else
22199 operands[0] = gen_lowpart (DImode, operands[0]);
22200 operands[1] = gen_lowpart (DImode, operands[1]);
22201 emit_move_insn (operands[0], operands[1]);
22202 return;
22203 }
22204
22205 /* The only non-offsettable memory we handle is push. */
22206 if (push_operand (operands[0], VOIDmode))
22207 push = 1;
22208 else
22209 gcc_assert (!MEM_P (operands[0])
22210 || offsettable_memref_p (operands[0]));
22211
22212 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22213 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22214
22215 /* When emitting push, take care for source operands on the stack. */
22216 if (push && MEM_P (operands[1])
22217 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22218 {
22219 rtx src_base = XEXP (part[1][nparts - 1], 0);
22220
22221 /* Compensate for the stack decrement by 4. */
22222 if (!TARGET_64BIT && nparts == 3
22223 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22224 src_base = plus_constant (Pmode, src_base, 4);
22225
22226 /* src_base refers to the stack pointer and is
22227 automatically decreased by emitted push. */
22228 for (i = 0; i < nparts; i++)
22229 part[1][i] = change_address (part[1][i],
22230 GET_MODE (part[1][i]), src_base);
22231 }
22232
22233 /* We need to do copy in the right order in case an address register
22234 of the source overlaps the destination. */
22235 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22236 {
22237 rtx tmp;
22238
22239 for (i = 0; i < nparts; i++)
22240 {
22241 collisionparts[i]
22242 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22243 if (collisionparts[i])
22244 collisions++;
22245 }
22246
22247 /* Collision in the middle part can be handled by reordering. */
22248 if (collisions == 1 && nparts == 3 && collisionparts [1])
22249 {
22250 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22251 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22252 }
22253 else if (collisions == 1
22254 && nparts == 4
22255 && (collisionparts [1] || collisionparts [2]))
22256 {
22257 if (collisionparts [1])
22258 {
22259 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22260 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22261 }
22262 else
22263 {
22264 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22265 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22266 }
22267 }
22268
22269 /* If there are more collisions, we can't handle it by reordering.
22270 Do an lea to the last part and use only one colliding move. */
22271 else if (collisions > 1)
22272 {
22273 rtx base;
22274
22275 collisions = 1;
22276
22277 base = part[0][nparts - 1];
22278
22279 /* Handle the case when the last part isn't valid for lea.
22280 Happens in 64-bit mode storing the 12-byte XFmode. */
22281 if (GET_MODE (base) != Pmode)
22282 base = gen_rtx_REG (Pmode, REGNO (base));
22283
22284 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22285 part[1][0] = replace_equiv_address (part[1][0], base);
22286 for (i = 1; i < nparts; i++)
22287 {
22288 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22289 part[1][i] = replace_equiv_address (part[1][i], tmp);
22290 }
22291 }
22292 }
22293
22294 if (push)
22295 {
22296 if (!TARGET_64BIT)
22297 {
22298 if (nparts == 3)
22299 {
22300 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22301 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22302 stack_pointer_rtx, GEN_INT (-4)));
22303 emit_move_insn (part[0][2], part[1][2]);
22304 }
22305 else if (nparts == 4)
22306 {
22307 emit_move_insn (part[0][3], part[1][3]);
22308 emit_move_insn (part[0][2], part[1][2]);
22309 }
22310 }
22311 else
22312 {
22313 /* In 64bit mode we don't have 32bit push available. In case this is
22314 register, it is OK - we will just use larger counterpart. We also
22315 retype memory - these comes from attempt to avoid REX prefix on
22316 moving of second half of TFmode value. */
22317 if (GET_MODE (part[1][1]) == SImode)
22318 {
22319 switch (GET_CODE (part[1][1]))
22320 {
22321 case MEM:
22322 part[1][1] = adjust_address (part[1][1], DImode, 0);
22323 break;
22324
22325 case REG:
22326 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22327 break;
22328
22329 default:
22330 gcc_unreachable ();
22331 }
22332
22333 if (GET_MODE (part[1][0]) == SImode)
22334 part[1][0] = part[1][1];
22335 }
22336 }
22337 emit_move_insn (part[0][1], part[1][1]);
22338 emit_move_insn (part[0][0], part[1][0]);
22339 return;
22340 }
22341
22342 /* Choose correct order to not overwrite the source before it is copied. */
22343 if ((REG_P (part[0][0])
22344 && REG_P (part[1][1])
22345 && (REGNO (part[0][0]) == REGNO (part[1][1])
22346 || (nparts == 3
22347 && REGNO (part[0][0]) == REGNO (part[1][2]))
22348 || (nparts == 4
22349 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22350 || (collisions > 0
22351 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22352 {
22353 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22354 {
22355 operands[2 + i] = part[0][j];
22356 operands[6 + i] = part[1][j];
22357 }
22358 }
22359 else
22360 {
22361 for (i = 0; i < nparts; i++)
22362 {
22363 operands[2 + i] = part[0][i];
22364 operands[6 + i] = part[1][i];
22365 }
22366 }
22367
22368 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22369 if (optimize_insn_for_size_p ())
22370 {
22371 for (j = 0; j < nparts - 1; j++)
22372 if (CONST_INT_P (operands[6 + j])
22373 && operands[6 + j] != const0_rtx
22374 && REG_P (operands[2 + j]))
22375 for (i = j; i < nparts - 1; i++)
22376 if (CONST_INT_P (operands[7 + i])
22377 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22378 operands[7 + i] = operands[2 + j];
22379 }
22380
22381 for (i = 0; i < nparts; i++)
22382 emit_move_insn (operands[2 + i], operands[6 + i]);
22383
22384 return;
22385 }
22386
22387 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22388 left shift by a constant, either using a single shift or
22389 a sequence of add instructions. */
22390
22391 static void
22392 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22393 {
22394 rtx (*insn)(rtx, rtx, rtx);
22395
22396 if (count == 1
22397 || (count * ix86_cost->add <= ix86_cost->shift_const
22398 && !optimize_insn_for_size_p ()))
22399 {
22400 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22401 while (count-- > 0)
22402 emit_insn (insn (operand, operand, operand));
22403 }
22404 else
22405 {
22406 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22407 emit_insn (insn (operand, operand, GEN_INT (count)));
22408 }
22409 }
22410
22411 void
22412 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22413 {
22414 rtx (*gen_ashl3)(rtx, rtx, rtx);
22415 rtx (*gen_shld)(rtx, rtx, rtx);
22416 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22417
22418 rtx low[2], high[2];
22419 int count;
22420
22421 if (CONST_INT_P (operands[2]))
22422 {
22423 split_double_mode (mode, operands, 2, low, high);
22424 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22425
22426 if (count >= half_width)
22427 {
22428 emit_move_insn (high[0], low[1]);
22429 emit_move_insn (low[0], const0_rtx);
22430
22431 if (count > half_width)
22432 ix86_expand_ashl_const (high[0], count - half_width, mode);
22433 }
22434 else
22435 {
22436 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22437
22438 if (!rtx_equal_p (operands[0], operands[1]))
22439 emit_move_insn (operands[0], operands[1]);
22440
22441 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22442 ix86_expand_ashl_const (low[0], count, mode);
22443 }
22444 return;
22445 }
22446
22447 split_double_mode (mode, operands, 1, low, high);
22448
22449 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22450
22451 if (operands[1] == const1_rtx)
22452 {
22453 /* Assuming we've chosen a QImode capable registers, then 1 << N
22454 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22455 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22456 {
22457 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22458
22459 ix86_expand_clear (low[0]);
22460 ix86_expand_clear (high[0]);
22461 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22462
22463 d = gen_lowpart (QImode, low[0]);
22464 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22465 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22466 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22467
22468 d = gen_lowpart (QImode, high[0]);
22469 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22470 s = gen_rtx_NE (QImode, flags, const0_rtx);
22471 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22472 }
22473
22474 /* Otherwise, we can get the same results by manually performing
22475 a bit extract operation on bit 5/6, and then performing the two
22476 shifts. The two methods of getting 0/1 into low/high are exactly
22477 the same size. Avoiding the shift in the bit extract case helps
22478 pentium4 a bit; no one else seems to care much either way. */
22479 else
22480 {
22481 enum machine_mode half_mode;
22482 rtx (*gen_lshr3)(rtx, rtx, rtx);
22483 rtx (*gen_and3)(rtx, rtx, rtx);
22484 rtx (*gen_xor3)(rtx, rtx, rtx);
22485 HOST_WIDE_INT bits;
22486 rtx x;
22487
22488 if (mode == DImode)
22489 {
22490 half_mode = SImode;
22491 gen_lshr3 = gen_lshrsi3;
22492 gen_and3 = gen_andsi3;
22493 gen_xor3 = gen_xorsi3;
22494 bits = 5;
22495 }
22496 else
22497 {
22498 half_mode = DImode;
22499 gen_lshr3 = gen_lshrdi3;
22500 gen_and3 = gen_anddi3;
22501 gen_xor3 = gen_xordi3;
22502 bits = 6;
22503 }
22504
22505 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22506 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22507 else
22508 x = gen_lowpart (half_mode, operands[2]);
22509 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22510
22511 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22512 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22513 emit_move_insn (low[0], high[0]);
22514 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22515 }
22516
22517 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22518 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22519 return;
22520 }
22521
22522 if (operands[1] == constm1_rtx)
22523 {
22524 /* For -1 << N, we can avoid the shld instruction, because we
22525 know that we're shifting 0...31/63 ones into a -1. */
22526 emit_move_insn (low[0], constm1_rtx);
22527 if (optimize_insn_for_size_p ())
22528 emit_move_insn (high[0], low[0]);
22529 else
22530 emit_move_insn (high[0], constm1_rtx);
22531 }
22532 else
22533 {
22534 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22535
22536 if (!rtx_equal_p (operands[0], operands[1]))
22537 emit_move_insn (operands[0], operands[1]);
22538
22539 split_double_mode (mode, operands, 1, low, high);
22540 emit_insn (gen_shld (high[0], low[0], operands[2]));
22541 }
22542
22543 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22544
22545 if (TARGET_CMOVE && scratch)
22546 {
22547 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22548 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22549
22550 ix86_expand_clear (scratch);
22551 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22552 }
22553 else
22554 {
22555 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22556 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22557
22558 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22559 }
22560 }
22561
22562 void
22563 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22564 {
22565 rtx (*gen_ashr3)(rtx, rtx, rtx)
22566 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22567 rtx (*gen_shrd)(rtx, rtx, rtx);
22568 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22569
22570 rtx low[2], high[2];
22571 int count;
22572
22573 if (CONST_INT_P (operands[2]))
22574 {
22575 split_double_mode (mode, operands, 2, low, high);
22576 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22577
22578 if (count == GET_MODE_BITSIZE (mode) - 1)
22579 {
22580 emit_move_insn (high[0], high[1]);
22581 emit_insn (gen_ashr3 (high[0], high[0],
22582 GEN_INT (half_width - 1)));
22583 emit_move_insn (low[0], high[0]);
22584
22585 }
22586 else if (count >= half_width)
22587 {
22588 emit_move_insn (low[0], high[1]);
22589 emit_move_insn (high[0], low[0]);
22590 emit_insn (gen_ashr3 (high[0], high[0],
22591 GEN_INT (half_width - 1)));
22592
22593 if (count > half_width)
22594 emit_insn (gen_ashr3 (low[0], low[0],
22595 GEN_INT (count - half_width)));
22596 }
22597 else
22598 {
22599 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22600
22601 if (!rtx_equal_p (operands[0], operands[1]))
22602 emit_move_insn (operands[0], operands[1]);
22603
22604 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22605 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22606 }
22607 }
22608 else
22609 {
22610 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22611
22612 if (!rtx_equal_p (operands[0], operands[1]))
22613 emit_move_insn (operands[0], operands[1]);
22614
22615 split_double_mode (mode, operands, 1, low, high);
22616
22617 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22618 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22619
22620 if (TARGET_CMOVE && scratch)
22621 {
22622 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22623 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22624
22625 emit_move_insn (scratch, high[0]);
22626 emit_insn (gen_ashr3 (scratch, scratch,
22627 GEN_INT (half_width - 1)));
22628 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22629 scratch));
22630 }
22631 else
22632 {
22633 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22634 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22635
22636 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22637 }
22638 }
22639 }
22640
22641 void
22642 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22643 {
22644 rtx (*gen_lshr3)(rtx, rtx, rtx)
22645 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22646 rtx (*gen_shrd)(rtx, rtx, rtx);
22647 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22648
22649 rtx low[2], high[2];
22650 int count;
22651
22652 if (CONST_INT_P (operands[2]))
22653 {
22654 split_double_mode (mode, operands, 2, low, high);
22655 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22656
22657 if (count >= half_width)
22658 {
22659 emit_move_insn (low[0], high[1]);
22660 ix86_expand_clear (high[0]);
22661
22662 if (count > half_width)
22663 emit_insn (gen_lshr3 (low[0], low[0],
22664 GEN_INT (count - half_width)));
22665 }
22666 else
22667 {
22668 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22669
22670 if (!rtx_equal_p (operands[0], operands[1]))
22671 emit_move_insn (operands[0], operands[1]);
22672
22673 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22674 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22675 }
22676 }
22677 else
22678 {
22679 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22680
22681 if (!rtx_equal_p (operands[0], operands[1]))
22682 emit_move_insn (operands[0], operands[1]);
22683
22684 split_double_mode (mode, operands, 1, low, high);
22685
22686 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22687 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22688
22689 if (TARGET_CMOVE && scratch)
22690 {
22691 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22692 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22693
22694 ix86_expand_clear (scratch);
22695 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22696 scratch));
22697 }
22698 else
22699 {
22700 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22701 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22702
22703 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22704 }
22705 }
22706 }
22707
22708 /* Predict just emitted jump instruction to be taken with probability PROB. */
22709 static void
22710 predict_jump (int prob)
22711 {
22712 rtx insn = get_last_insn ();
22713 gcc_assert (JUMP_P (insn));
22714 add_int_reg_note (insn, REG_BR_PROB, prob);
22715 }
22716
22717 /* Helper function for the string operations below. Dest VARIABLE whether
22718 it is aligned to VALUE bytes. If true, jump to the label. */
22719 static rtx
22720 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22721 {
22722 rtx label = gen_label_rtx ();
22723 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22724 if (GET_MODE (variable) == DImode)
22725 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22726 else
22727 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22728 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22729 1, label);
22730 if (epilogue)
22731 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22732 else
22733 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22734 return label;
22735 }
22736
22737 /* Adjust COUNTER by the VALUE. */
22738 static void
22739 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22740 {
22741 rtx (*gen_add)(rtx, rtx, rtx)
22742 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22743
22744 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22745 }
22746
22747 /* Zero extend possibly SImode EXP to Pmode register. */
22748 rtx
22749 ix86_zero_extend_to_Pmode (rtx exp)
22750 {
22751 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22752 }
22753
22754 /* Divide COUNTREG by SCALE. */
22755 static rtx
22756 scale_counter (rtx countreg, int scale)
22757 {
22758 rtx sc;
22759
22760 if (scale == 1)
22761 return countreg;
22762 if (CONST_INT_P (countreg))
22763 return GEN_INT (INTVAL (countreg) / scale);
22764 gcc_assert (REG_P (countreg));
22765
22766 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22767 GEN_INT (exact_log2 (scale)),
22768 NULL, 1, OPTAB_DIRECT);
22769 return sc;
22770 }
22771
22772 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22773 DImode for constant loop counts. */
22774
22775 static enum machine_mode
22776 counter_mode (rtx count_exp)
22777 {
22778 if (GET_MODE (count_exp) != VOIDmode)
22779 return GET_MODE (count_exp);
22780 if (!CONST_INT_P (count_exp))
22781 return Pmode;
22782 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22783 return DImode;
22784 return SImode;
22785 }
22786
22787 /* Copy the address to a Pmode register. This is used for x32 to
22788 truncate DImode TLS address to a SImode register. */
22789
22790 static rtx
22791 ix86_copy_addr_to_reg (rtx addr)
22792 {
22793 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22794 return copy_addr_to_reg (addr);
22795 else
22796 {
22797 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22798 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22799 }
22800 }
22801
22802 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22803 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22804 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22805 memory by VALUE (supposed to be in MODE).
22806
22807 The size is rounded down to whole number of chunk size moved at once.
22808 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22809
22810
22811 static void
22812 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22813 rtx destptr, rtx srcptr, rtx value,
22814 rtx count, enum machine_mode mode, int unroll,
22815 int expected_size, bool issetmem)
22816 {
22817 rtx out_label, top_label, iter, tmp;
22818 enum machine_mode iter_mode = counter_mode (count);
22819 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22820 rtx piece_size = GEN_INT (piece_size_n);
22821 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22822 rtx size;
22823 int i;
22824
22825 top_label = gen_label_rtx ();
22826 out_label = gen_label_rtx ();
22827 iter = gen_reg_rtx (iter_mode);
22828
22829 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22830 NULL, 1, OPTAB_DIRECT);
22831 /* Those two should combine. */
22832 if (piece_size == const1_rtx)
22833 {
22834 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22835 true, out_label);
22836 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22837 }
22838 emit_move_insn (iter, const0_rtx);
22839
22840 emit_label (top_label);
22841
22842 tmp = convert_modes (Pmode, iter_mode, iter, true);
22843
22844 /* This assert could be relaxed - in this case we'll need to compute
22845 smallest power of two, containing in PIECE_SIZE_N and pass it to
22846 offset_address. */
22847 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22848 destmem = offset_address (destmem, tmp, piece_size_n);
22849 destmem = adjust_address (destmem, mode, 0);
22850
22851 if (!issetmem)
22852 {
22853 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22854 srcmem = adjust_address (srcmem, mode, 0);
22855
22856 /* When unrolling for chips that reorder memory reads and writes,
22857 we can save registers by using single temporary.
22858 Also using 4 temporaries is overkill in 32bit mode. */
22859 if (!TARGET_64BIT && 0)
22860 {
22861 for (i = 0; i < unroll; i++)
22862 {
22863 if (i)
22864 {
22865 destmem =
22866 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22867 srcmem =
22868 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22869 }
22870 emit_move_insn (destmem, srcmem);
22871 }
22872 }
22873 else
22874 {
22875 rtx tmpreg[4];
22876 gcc_assert (unroll <= 4);
22877 for (i = 0; i < unroll; i++)
22878 {
22879 tmpreg[i] = gen_reg_rtx (mode);
22880 if (i)
22881 {
22882 srcmem =
22883 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22884 }
22885 emit_move_insn (tmpreg[i], srcmem);
22886 }
22887 for (i = 0; i < unroll; i++)
22888 {
22889 if (i)
22890 {
22891 destmem =
22892 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22893 }
22894 emit_move_insn (destmem, tmpreg[i]);
22895 }
22896 }
22897 }
22898 else
22899 for (i = 0; i < unroll; i++)
22900 {
22901 if (i)
22902 destmem =
22903 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22904 emit_move_insn (destmem, value);
22905 }
22906
22907 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22908 true, OPTAB_LIB_WIDEN);
22909 if (tmp != iter)
22910 emit_move_insn (iter, tmp);
22911
22912 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22913 true, top_label);
22914 if (expected_size != -1)
22915 {
22916 expected_size /= GET_MODE_SIZE (mode) * unroll;
22917 if (expected_size == 0)
22918 predict_jump (0);
22919 else if (expected_size > REG_BR_PROB_BASE)
22920 predict_jump (REG_BR_PROB_BASE - 1);
22921 else
22922 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22923 }
22924 else
22925 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22926 iter = ix86_zero_extend_to_Pmode (iter);
22927 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22928 true, OPTAB_LIB_WIDEN);
22929 if (tmp != destptr)
22930 emit_move_insn (destptr, tmp);
22931 if (!issetmem)
22932 {
22933 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22934 true, OPTAB_LIB_WIDEN);
22935 if (tmp != srcptr)
22936 emit_move_insn (srcptr, tmp);
22937 }
22938 emit_label (out_label);
22939 }
22940
22941 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22942 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22943 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22944 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22945 ORIG_VALUE is the original value passed to memset to fill the memory with.
22946 Other arguments have same meaning as for previous function. */
22947
22948 static void
22949 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22950 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22951 rtx count,
22952 enum machine_mode mode, bool issetmem)
22953 {
22954 rtx destexp;
22955 rtx srcexp;
22956 rtx countreg;
22957 HOST_WIDE_INT rounded_count;
22958
22959 /* If possible, it is shorter to use rep movs.
22960 TODO: Maybe it is better to move this logic to decide_alg. */
22961 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22962 && (!issetmem || orig_value == const0_rtx))
22963 mode = SImode;
22964
22965 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22966 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22967
22968 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22969 GET_MODE_SIZE (mode)));
22970 if (mode != QImode)
22971 {
22972 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22973 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22974 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22975 }
22976 else
22977 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22978 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22979 {
22980 rounded_count = (INTVAL (count)
22981 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22982 destmem = shallow_copy_rtx (destmem);
22983 set_mem_size (destmem, rounded_count);
22984 }
22985 else if (MEM_SIZE_KNOWN_P (destmem))
22986 clear_mem_size (destmem);
22987
22988 if (issetmem)
22989 {
22990 value = force_reg (mode, gen_lowpart (mode, value));
22991 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22992 }
22993 else
22994 {
22995 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22996 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22997 if (mode != QImode)
22998 {
22999 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23000 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23001 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23002 }
23003 else
23004 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23005 if (CONST_INT_P (count))
23006 {
23007 rounded_count = (INTVAL (count)
23008 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23009 srcmem = shallow_copy_rtx (srcmem);
23010 set_mem_size (srcmem, rounded_count);
23011 }
23012 else
23013 {
23014 if (MEM_SIZE_KNOWN_P (srcmem))
23015 clear_mem_size (srcmem);
23016 }
23017 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23018 destexp, srcexp));
23019 }
23020 }
23021
23022 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23023 DESTMEM.
23024 SRC is passed by pointer to be updated on return.
23025 Return value is updated DST. */
23026 static rtx
23027 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23028 HOST_WIDE_INT size_to_move)
23029 {
23030 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23031 enum insn_code code;
23032 enum machine_mode move_mode;
23033 int piece_size, i;
23034
23035 /* Find the widest mode in which we could perform moves.
23036 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23037 it until move of such size is supported. */
23038 piece_size = 1 << floor_log2 (size_to_move);
23039 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23040 code = optab_handler (mov_optab, move_mode);
23041 while (code == CODE_FOR_nothing && piece_size > 1)
23042 {
23043 piece_size >>= 1;
23044 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23045 code = optab_handler (mov_optab, move_mode);
23046 }
23047
23048 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23049 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23050 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23051 {
23052 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23053 move_mode = mode_for_vector (word_mode, nunits);
23054 code = optab_handler (mov_optab, move_mode);
23055 if (code == CODE_FOR_nothing)
23056 {
23057 move_mode = word_mode;
23058 piece_size = GET_MODE_SIZE (move_mode);
23059 code = optab_handler (mov_optab, move_mode);
23060 }
23061 }
23062 gcc_assert (code != CODE_FOR_nothing);
23063
23064 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23065 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23066
23067 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23068 gcc_assert (size_to_move % piece_size == 0);
23069 adjust = GEN_INT (piece_size);
23070 for (i = 0; i < size_to_move; i += piece_size)
23071 {
23072 /* We move from memory to memory, so we'll need to do it via
23073 a temporary register. */
23074 tempreg = gen_reg_rtx (move_mode);
23075 emit_insn (GEN_FCN (code) (tempreg, src));
23076 emit_insn (GEN_FCN (code) (dst, tempreg));
23077
23078 emit_move_insn (destptr,
23079 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23080 emit_move_insn (srcptr,
23081 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23082
23083 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23084 piece_size);
23085 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23086 piece_size);
23087 }
23088
23089 /* Update DST and SRC rtx. */
23090 *srcmem = src;
23091 return dst;
23092 }
23093
23094 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23095 static void
23096 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23097 rtx destptr, rtx srcptr, rtx count, int max_size)
23098 {
23099 rtx src, dest;
23100 if (CONST_INT_P (count))
23101 {
23102 HOST_WIDE_INT countval = INTVAL (count);
23103 HOST_WIDE_INT epilogue_size = countval % max_size;
23104 int i;
23105
23106 /* For now MAX_SIZE should be a power of 2. This assert could be
23107 relaxed, but it'll require a bit more complicated epilogue
23108 expanding. */
23109 gcc_assert ((max_size & (max_size - 1)) == 0);
23110 for (i = max_size; i >= 1; i >>= 1)
23111 {
23112 if (epilogue_size & i)
23113 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23114 }
23115 return;
23116 }
23117 if (max_size > 8)
23118 {
23119 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23120 count, 1, OPTAB_DIRECT);
23121 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23122 count, QImode, 1, 4, false);
23123 return;
23124 }
23125
23126 /* When there are stringops, we can cheaply increase dest and src pointers.
23127 Otherwise we save code size by maintaining offset (zero is readily
23128 available from preceding rep operation) and using x86 addressing modes.
23129 */
23130 if (TARGET_SINGLE_STRINGOP)
23131 {
23132 if (max_size > 4)
23133 {
23134 rtx label = ix86_expand_aligntest (count, 4, true);
23135 src = change_address (srcmem, SImode, srcptr);
23136 dest = change_address (destmem, SImode, destptr);
23137 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23138 emit_label (label);
23139 LABEL_NUSES (label) = 1;
23140 }
23141 if (max_size > 2)
23142 {
23143 rtx label = ix86_expand_aligntest (count, 2, true);
23144 src = change_address (srcmem, HImode, srcptr);
23145 dest = change_address (destmem, HImode, destptr);
23146 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23147 emit_label (label);
23148 LABEL_NUSES (label) = 1;
23149 }
23150 if (max_size > 1)
23151 {
23152 rtx label = ix86_expand_aligntest (count, 1, true);
23153 src = change_address (srcmem, QImode, srcptr);
23154 dest = change_address (destmem, QImode, destptr);
23155 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23156 emit_label (label);
23157 LABEL_NUSES (label) = 1;
23158 }
23159 }
23160 else
23161 {
23162 rtx offset = force_reg (Pmode, const0_rtx);
23163 rtx tmp;
23164
23165 if (max_size > 4)
23166 {
23167 rtx label = ix86_expand_aligntest (count, 4, true);
23168 src = change_address (srcmem, SImode, srcptr);
23169 dest = change_address (destmem, SImode, destptr);
23170 emit_move_insn (dest, src);
23171 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23172 true, OPTAB_LIB_WIDEN);
23173 if (tmp != offset)
23174 emit_move_insn (offset, tmp);
23175 emit_label (label);
23176 LABEL_NUSES (label) = 1;
23177 }
23178 if (max_size > 2)
23179 {
23180 rtx label = ix86_expand_aligntest (count, 2, true);
23181 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23182 src = change_address (srcmem, HImode, tmp);
23183 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23184 dest = change_address (destmem, HImode, tmp);
23185 emit_move_insn (dest, src);
23186 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23187 true, OPTAB_LIB_WIDEN);
23188 if (tmp != offset)
23189 emit_move_insn (offset, tmp);
23190 emit_label (label);
23191 LABEL_NUSES (label) = 1;
23192 }
23193 if (max_size > 1)
23194 {
23195 rtx label = ix86_expand_aligntest (count, 1, true);
23196 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23197 src = change_address (srcmem, QImode, tmp);
23198 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23199 dest = change_address (destmem, QImode, tmp);
23200 emit_move_insn (dest, src);
23201 emit_label (label);
23202 LABEL_NUSES (label) = 1;
23203 }
23204 }
23205 }
23206
23207 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23208 with value PROMOTED_VAL.
23209 SRC is passed by pointer to be updated on return.
23210 Return value is updated DST. */
23211 static rtx
23212 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23213 HOST_WIDE_INT size_to_move)
23214 {
23215 rtx dst = destmem, adjust;
23216 enum insn_code code;
23217 enum machine_mode move_mode;
23218 int piece_size, i;
23219
23220 /* Find the widest mode in which we could perform moves.
23221 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23222 it until move of such size is supported. */
23223 move_mode = GET_MODE (promoted_val);
23224 if (move_mode == VOIDmode)
23225 move_mode = QImode;
23226 if (size_to_move < GET_MODE_SIZE (move_mode))
23227 {
23228 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23229 promoted_val = gen_lowpart (move_mode, promoted_val);
23230 }
23231 piece_size = GET_MODE_SIZE (move_mode);
23232 code = optab_handler (mov_optab, move_mode);
23233 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23234
23235 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23236
23237 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23238 gcc_assert (size_to_move % piece_size == 0);
23239 adjust = GEN_INT (piece_size);
23240 for (i = 0; i < size_to_move; i += piece_size)
23241 {
23242 if (piece_size <= GET_MODE_SIZE (word_mode))
23243 {
23244 emit_insn (gen_strset (destptr, dst, promoted_val));
23245 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23246 piece_size);
23247 continue;
23248 }
23249
23250 emit_insn (GEN_FCN (code) (dst, promoted_val));
23251
23252 emit_move_insn (destptr,
23253 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23254
23255 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23256 piece_size);
23257 }
23258
23259 /* Update DST rtx. */
23260 return dst;
23261 }
23262 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23263 static void
23264 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23265 rtx count, int max_size)
23266 {
23267 count =
23268 expand_simple_binop (counter_mode (count), AND, count,
23269 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23270 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23271 gen_lowpart (QImode, value), count, QImode,
23272 1, max_size / 2, true);
23273 }
23274
23275 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23276 static void
23277 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23278 rtx count, int max_size)
23279 {
23280 rtx dest;
23281
23282 if (CONST_INT_P (count))
23283 {
23284 HOST_WIDE_INT countval = INTVAL (count);
23285 HOST_WIDE_INT epilogue_size = countval % max_size;
23286 int i;
23287
23288 /* For now MAX_SIZE should be a power of 2. This assert could be
23289 relaxed, but it'll require a bit more complicated epilogue
23290 expanding. */
23291 gcc_assert ((max_size & (max_size - 1)) == 0);
23292 for (i = max_size; i >= 1; i >>= 1)
23293 {
23294 if (epilogue_size & i)
23295 {
23296 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23297 destmem = emit_memset (destmem, destptr, vec_value, i);
23298 else
23299 destmem = emit_memset (destmem, destptr, value, i);
23300 }
23301 }
23302 return;
23303 }
23304 if (max_size > 32)
23305 {
23306 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23307 return;
23308 }
23309 if (max_size > 16)
23310 {
23311 rtx label = ix86_expand_aligntest (count, 16, true);
23312 if (TARGET_64BIT)
23313 {
23314 dest = change_address (destmem, DImode, destptr);
23315 emit_insn (gen_strset (destptr, dest, value));
23316 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23317 emit_insn (gen_strset (destptr, dest, value));
23318 }
23319 else
23320 {
23321 dest = change_address (destmem, SImode, destptr);
23322 emit_insn (gen_strset (destptr, dest, value));
23323 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23324 emit_insn (gen_strset (destptr, dest, value));
23325 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23326 emit_insn (gen_strset (destptr, dest, value));
23327 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23328 emit_insn (gen_strset (destptr, dest, value));
23329 }
23330 emit_label (label);
23331 LABEL_NUSES (label) = 1;
23332 }
23333 if (max_size > 8)
23334 {
23335 rtx label = ix86_expand_aligntest (count, 8, true);
23336 if (TARGET_64BIT)
23337 {
23338 dest = change_address (destmem, DImode, destptr);
23339 emit_insn (gen_strset (destptr, dest, value));
23340 }
23341 else
23342 {
23343 dest = change_address (destmem, SImode, destptr);
23344 emit_insn (gen_strset (destptr, dest, value));
23345 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23346 emit_insn (gen_strset (destptr, dest, value));
23347 }
23348 emit_label (label);
23349 LABEL_NUSES (label) = 1;
23350 }
23351 if (max_size > 4)
23352 {
23353 rtx label = ix86_expand_aligntest (count, 4, true);
23354 dest = change_address (destmem, SImode, destptr);
23355 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23356 emit_label (label);
23357 LABEL_NUSES (label) = 1;
23358 }
23359 if (max_size > 2)
23360 {
23361 rtx label = ix86_expand_aligntest (count, 2, true);
23362 dest = change_address (destmem, HImode, destptr);
23363 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23364 emit_label (label);
23365 LABEL_NUSES (label) = 1;
23366 }
23367 if (max_size > 1)
23368 {
23369 rtx label = ix86_expand_aligntest (count, 1, true);
23370 dest = change_address (destmem, QImode, destptr);
23371 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23372 emit_label (label);
23373 LABEL_NUSES (label) = 1;
23374 }
23375 }
23376
23377 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23378 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23379 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23380 ignored.
23381 Return value is updated DESTMEM. */
23382 static rtx
23383 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23384 rtx destptr, rtx srcptr, rtx value,
23385 rtx vec_value, rtx count, int align,
23386 int desired_alignment, bool issetmem)
23387 {
23388 int i;
23389 for (i = 1; i < desired_alignment; i <<= 1)
23390 {
23391 if (align <= i)
23392 {
23393 rtx label = ix86_expand_aligntest (destptr, i, false);
23394 if (issetmem)
23395 {
23396 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23397 destmem = emit_memset (destmem, destptr, vec_value, i);
23398 else
23399 destmem = emit_memset (destmem, destptr, value, i);
23400 }
23401 else
23402 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23403 ix86_adjust_counter (count, i);
23404 emit_label (label);
23405 LABEL_NUSES (label) = 1;
23406 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23407 }
23408 }
23409 return destmem;
23410 }
23411
23412 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23413 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23414 and jump to DONE_LABEL. */
23415 static void
23416 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23417 rtx destptr, rtx srcptr,
23418 rtx value, rtx vec_value,
23419 rtx count, int size,
23420 rtx done_label, bool issetmem)
23421 {
23422 rtx label = ix86_expand_aligntest (count, size, false);
23423 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23424 rtx modesize;
23425 int n;
23426
23427 /* If we do not have vector value to copy, we must reduce size. */
23428 if (issetmem)
23429 {
23430 if (!vec_value)
23431 {
23432 if (GET_MODE (value) == VOIDmode && size > 8)
23433 mode = Pmode;
23434 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23435 mode = GET_MODE (value);
23436 }
23437 else
23438 mode = GET_MODE (vec_value), value = vec_value;
23439 }
23440 else
23441 {
23442 /* Choose appropriate vector mode. */
23443 if (size >= 32)
23444 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23445 else if (size >= 16)
23446 mode = TARGET_SSE ? V16QImode : DImode;
23447 srcmem = change_address (srcmem, mode, srcptr);
23448 }
23449 destmem = change_address (destmem, mode, destptr);
23450 modesize = GEN_INT (GET_MODE_SIZE (mode));
23451 gcc_assert (GET_MODE_SIZE (mode) <= size);
23452 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23453 {
23454 if (issetmem)
23455 emit_move_insn (destmem, gen_lowpart (mode, value));
23456 else
23457 {
23458 emit_move_insn (destmem, srcmem);
23459 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23460 }
23461 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23462 }
23463
23464 destmem = offset_address (destmem, count, 1);
23465 destmem = offset_address (destmem, GEN_INT (-2 * size),
23466 GET_MODE_SIZE (mode));
23467 if (!issetmem)
23468 {
23469 srcmem = offset_address (srcmem, count, 1);
23470 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23471 GET_MODE_SIZE (mode));
23472 }
23473 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23474 {
23475 if (issetmem)
23476 emit_move_insn (destmem, gen_lowpart (mode, value));
23477 else
23478 {
23479 emit_move_insn (destmem, srcmem);
23480 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23481 }
23482 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23483 }
23484 emit_jump_insn (gen_jump (done_label));
23485 emit_barrier ();
23486
23487 emit_label (label);
23488 LABEL_NUSES (label) = 1;
23489 }
23490
23491 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23492 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23493 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23494 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23495 DONE_LABEL is a label after the whole copying sequence. The label is created
23496 on demand if *DONE_LABEL is NULL.
23497 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23498 bounds after the initial copies.
23499
23500 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23501 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23502 we will dispatch to a library call for large blocks.
23503
23504 In pseudocode we do:
23505
23506 if (COUNT < SIZE)
23507 {
23508 Assume that SIZE is 4. Bigger sizes are handled analogously
23509 if (COUNT & 4)
23510 {
23511 copy 4 bytes from SRCPTR to DESTPTR
23512 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23513 goto done_label
23514 }
23515 if (!COUNT)
23516 goto done_label;
23517 copy 1 byte from SRCPTR to DESTPTR
23518 if (COUNT & 2)
23519 {
23520 copy 2 bytes from SRCPTR to DESTPTR
23521 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23522 }
23523 }
23524 else
23525 {
23526 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23527 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23528
23529 OLD_DESPTR = DESTPTR;
23530 Align DESTPTR up to DESIRED_ALIGN
23531 SRCPTR += DESTPTR - OLD_DESTPTR
23532 COUNT -= DEST_PTR - OLD_DESTPTR
23533 if (DYNAMIC_CHECK)
23534 Round COUNT down to multiple of SIZE
23535 << optional caller supplied zero size guard is here >>
23536 << optional caller suppplied dynamic check is here >>
23537 << caller supplied main copy loop is here >>
23538 }
23539 done_label:
23540 */
23541 static void
23542 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23543 rtx *destptr, rtx *srcptr,
23544 enum machine_mode mode,
23545 rtx value, rtx vec_value,
23546 rtx *count,
23547 rtx *done_label,
23548 int size,
23549 int desired_align,
23550 int align,
23551 unsigned HOST_WIDE_INT *min_size,
23552 bool dynamic_check,
23553 bool issetmem)
23554 {
23555 rtx loop_label = NULL, label;
23556 int n;
23557 rtx modesize;
23558 int prolog_size = 0;
23559 rtx mode_value;
23560
23561 /* Chose proper value to copy. */
23562 if (issetmem && VECTOR_MODE_P (mode))
23563 mode_value = vec_value;
23564 else
23565 mode_value = value;
23566 gcc_assert (GET_MODE_SIZE (mode) <= size);
23567
23568 /* See if block is big or small, handle small blocks. */
23569 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23570 {
23571 int size2 = size;
23572 loop_label = gen_label_rtx ();
23573
23574 if (!*done_label)
23575 *done_label = gen_label_rtx ();
23576
23577 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23578 1, loop_label);
23579 size2 >>= 1;
23580
23581 /* Handle sizes > 3. */
23582 for (;size2 > 2; size2 >>= 1)
23583 expand_small_movmem_or_setmem (destmem, srcmem,
23584 *destptr, *srcptr,
23585 value, vec_value,
23586 *count,
23587 size2, *done_label, issetmem);
23588 /* Nothing to copy? Jump to DONE_LABEL if so */
23589 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23590 1, *done_label);
23591
23592 /* Do a byte copy. */
23593 destmem = change_address (destmem, QImode, *destptr);
23594 if (issetmem)
23595 emit_move_insn (destmem, gen_lowpart (QImode, value));
23596 else
23597 {
23598 srcmem = change_address (srcmem, QImode, *srcptr);
23599 emit_move_insn (destmem, srcmem);
23600 }
23601
23602 /* Handle sizes 2 and 3. */
23603 label = ix86_expand_aligntest (*count, 2, false);
23604 destmem = change_address (destmem, HImode, *destptr);
23605 destmem = offset_address (destmem, *count, 1);
23606 destmem = offset_address (destmem, GEN_INT (-2), 2);
23607 if (issetmem)
23608 emit_move_insn (destmem, gen_lowpart (HImode, value));
23609 else
23610 {
23611 srcmem = change_address (srcmem, HImode, *srcptr);
23612 srcmem = offset_address (srcmem, *count, 1);
23613 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23614 emit_move_insn (destmem, srcmem);
23615 }
23616
23617 emit_label (label);
23618 LABEL_NUSES (label) = 1;
23619 emit_jump_insn (gen_jump (*done_label));
23620 emit_barrier ();
23621 }
23622 else
23623 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23624 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23625
23626 /* Start memcpy for COUNT >= SIZE. */
23627 if (loop_label)
23628 {
23629 emit_label (loop_label);
23630 LABEL_NUSES (loop_label) = 1;
23631 }
23632
23633 /* Copy first desired_align bytes. */
23634 if (!issetmem)
23635 srcmem = change_address (srcmem, mode, *srcptr);
23636 destmem = change_address (destmem, mode, *destptr);
23637 modesize = GEN_INT (GET_MODE_SIZE (mode));
23638 for (n = 0; prolog_size < desired_align - align; n++)
23639 {
23640 if (issetmem)
23641 emit_move_insn (destmem, mode_value);
23642 else
23643 {
23644 emit_move_insn (destmem, srcmem);
23645 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23646 }
23647 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23648 prolog_size += GET_MODE_SIZE (mode);
23649 }
23650
23651
23652 /* Copy last SIZE bytes. */
23653 destmem = offset_address (destmem, *count, 1);
23654 destmem = offset_address (destmem,
23655 GEN_INT (-size - prolog_size),
23656 1);
23657 if (issetmem)
23658 emit_move_insn (destmem, mode_value);
23659 else
23660 {
23661 srcmem = offset_address (srcmem, *count, 1);
23662 srcmem = offset_address (srcmem,
23663 GEN_INT (-size - prolog_size),
23664 1);
23665 emit_move_insn (destmem, srcmem);
23666 }
23667 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23668 {
23669 destmem = offset_address (destmem, modesize, 1);
23670 if (issetmem)
23671 emit_move_insn (destmem, mode_value);
23672 else
23673 {
23674 srcmem = offset_address (srcmem, modesize, 1);
23675 emit_move_insn (destmem, srcmem);
23676 }
23677 }
23678
23679 /* Align destination. */
23680 if (desired_align > 1 && desired_align > align)
23681 {
23682 rtx saveddest = *destptr;
23683
23684 gcc_assert (desired_align <= size);
23685 /* Align destptr up, place it to new register. */
23686 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23687 GEN_INT (prolog_size),
23688 NULL_RTX, 1, OPTAB_DIRECT);
23689 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23690 GEN_INT (-desired_align),
23691 *destptr, 1, OPTAB_DIRECT);
23692 /* See how many bytes we skipped. */
23693 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23694 *destptr,
23695 saveddest, 1, OPTAB_DIRECT);
23696 /* Adjust srcptr and count. */
23697 if (!issetmem)
23698 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23699 *srcptr, 1, OPTAB_DIRECT);
23700 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23701 saveddest, *count, 1, OPTAB_DIRECT);
23702 /* We copied at most size + prolog_size. */
23703 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23704 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23705 else
23706 *min_size = 0;
23707
23708 /* Our loops always round down the bock size, but for dispatch to library
23709 we need precise value. */
23710 if (dynamic_check)
23711 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23712 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23713 }
23714 else
23715 {
23716 gcc_assert (prolog_size == 0);
23717 /* Decrease count, so we won't end up copying last word twice. */
23718 if (!CONST_INT_P (*count))
23719 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23720 constm1_rtx, *count, 1, OPTAB_DIRECT);
23721 else
23722 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23723 if (*min_size)
23724 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23725 }
23726 }
23727
23728
23729 /* This function is like the previous one, except here we know how many bytes
23730 need to be copied. That allows us to update alignment not only of DST, which
23731 is returned, but also of SRC, which is passed as a pointer for that
23732 reason. */
23733 static rtx
23734 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23735 rtx srcreg, rtx value, rtx vec_value,
23736 int desired_align, int align_bytes,
23737 bool issetmem)
23738 {
23739 rtx src = NULL;
23740 rtx orig_dst = dst;
23741 rtx orig_src = NULL;
23742 int piece_size = 1;
23743 int copied_bytes = 0;
23744
23745 if (!issetmem)
23746 {
23747 gcc_assert (srcp != NULL);
23748 src = *srcp;
23749 orig_src = src;
23750 }
23751
23752 for (piece_size = 1;
23753 piece_size <= desired_align && copied_bytes < align_bytes;
23754 piece_size <<= 1)
23755 {
23756 if (align_bytes & piece_size)
23757 {
23758 if (issetmem)
23759 {
23760 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23761 dst = emit_memset (dst, destreg, vec_value, piece_size);
23762 else
23763 dst = emit_memset (dst, destreg, value, piece_size);
23764 }
23765 else
23766 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23767 copied_bytes += piece_size;
23768 }
23769 }
23770 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23771 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23772 if (MEM_SIZE_KNOWN_P (orig_dst))
23773 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23774
23775 if (!issetmem)
23776 {
23777 int src_align_bytes = get_mem_align_offset (src, desired_align
23778 * BITS_PER_UNIT);
23779 if (src_align_bytes >= 0)
23780 src_align_bytes = desired_align - src_align_bytes;
23781 if (src_align_bytes >= 0)
23782 {
23783 unsigned int src_align;
23784 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23785 {
23786 if ((src_align_bytes & (src_align - 1))
23787 == (align_bytes & (src_align - 1)))
23788 break;
23789 }
23790 if (src_align > (unsigned int) desired_align)
23791 src_align = desired_align;
23792 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23793 set_mem_align (src, src_align * BITS_PER_UNIT);
23794 }
23795 if (MEM_SIZE_KNOWN_P (orig_src))
23796 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23797 *srcp = src;
23798 }
23799
23800 return dst;
23801 }
23802
23803 /* Return true if ALG can be used in current context.
23804 Assume we expand memset if MEMSET is true. */
23805 static bool
23806 alg_usable_p (enum stringop_alg alg, bool memset)
23807 {
23808 if (alg == no_stringop)
23809 return false;
23810 if (alg == vector_loop)
23811 return TARGET_SSE || TARGET_AVX;
23812 /* Algorithms using the rep prefix want at least edi and ecx;
23813 additionally, memset wants eax and memcpy wants esi. Don't
23814 consider such algorithms if the user has appropriated those
23815 registers for their own purposes. */
23816 if (alg == rep_prefix_1_byte
23817 || alg == rep_prefix_4_byte
23818 || alg == rep_prefix_8_byte)
23819 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23820 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23821 return true;
23822 }
23823
23824 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23825 static enum stringop_alg
23826 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23827 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23828 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23829 {
23830 const struct stringop_algs * algs;
23831 bool optimize_for_speed;
23832 int max = 0;
23833 const struct processor_costs *cost;
23834 int i;
23835 bool any_alg_usable_p = false;
23836
23837 *noalign = false;
23838 *dynamic_check = -1;
23839
23840 /* Even if the string operation call is cold, we still might spend a lot
23841 of time processing large blocks. */
23842 if (optimize_function_for_size_p (cfun)
23843 || (optimize_insn_for_size_p ()
23844 && (max_size < 256
23845 || (expected_size != -1 && expected_size < 256))))
23846 optimize_for_speed = false;
23847 else
23848 optimize_for_speed = true;
23849
23850 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23851 if (memset)
23852 algs = &cost->memset[TARGET_64BIT != 0];
23853 else
23854 algs = &cost->memcpy[TARGET_64BIT != 0];
23855
23856 /* See maximal size for user defined algorithm. */
23857 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23858 {
23859 enum stringop_alg candidate = algs->size[i].alg;
23860 bool usable = alg_usable_p (candidate, memset);
23861 any_alg_usable_p |= usable;
23862
23863 if (candidate != libcall && candidate && usable)
23864 max = algs->size[i].max;
23865 }
23866
23867 /* If expected size is not known but max size is small enough
23868 so inline version is a win, set expected size into
23869 the range. */
23870 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23871 && expected_size == -1)
23872 expected_size = min_size / 2 + max_size / 2;
23873
23874 /* If user specified the algorithm, honnor it if possible. */
23875 if (ix86_stringop_alg != no_stringop
23876 && alg_usable_p (ix86_stringop_alg, memset))
23877 return ix86_stringop_alg;
23878 /* rep; movq or rep; movl is the smallest variant. */
23879 else if (!optimize_for_speed)
23880 {
23881 *noalign = true;
23882 if (!count || (count & 3) || (memset && !zero_memset))
23883 return alg_usable_p (rep_prefix_1_byte, memset)
23884 ? rep_prefix_1_byte : loop_1_byte;
23885 else
23886 return alg_usable_p (rep_prefix_4_byte, memset)
23887 ? rep_prefix_4_byte : loop;
23888 }
23889 /* Very tiny blocks are best handled via the loop, REP is expensive to
23890 setup. */
23891 else if (expected_size != -1 && expected_size < 4)
23892 return loop_1_byte;
23893 else if (expected_size != -1)
23894 {
23895 enum stringop_alg alg = libcall;
23896 bool alg_noalign = false;
23897 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23898 {
23899 /* We get here if the algorithms that were not libcall-based
23900 were rep-prefix based and we are unable to use rep prefixes
23901 based on global register usage. Break out of the loop and
23902 use the heuristic below. */
23903 if (algs->size[i].max == 0)
23904 break;
23905 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23906 {
23907 enum stringop_alg candidate = algs->size[i].alg;
23908
23909 if (candidate != libcall && alg_usable_p (candidate, memset))
23910 {
23911 alg = candidate;
23912 alg_noalign = algs->size[i].noalign;
23913 }
23914 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23915 last non-libcall inline algorithm. */
23916 if (TARGET_INLINE_ALL_STRINGOPS)
23917 {
23918 /* When the current size is best to be copied by a libcall,
23919 but we are still forced to inline, run the heuristic below
23920 that will pick code for medium sized blocks. */
23921 if (alg != libcall)
23922 {
23923 *noalign = alg_noalign;
23924 return alg;
23925 }
23926 break;
23927 }
23928 else if (alg_usable_p (candidate, memset))
23929 {
23930 *noalign = algs->size[i].noalign;
23931 return candidate;
23932 }
23933 }
23934 }
23935 }
23936 /* When asked to inline the call anyway, try to pick meaningful choice.
23937 We look for maximal size of block that is faster to copy by hand and
23938 take blocks of at most of that size guessing that average size will
23939 be roughly half of the block.
23940
23941 If this turns out to be bad, we might simply specify the preferred
23942 choice in ix86_costs. */
23943 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23944 && (algs->unknown_size == libcall
23945 || !alg_usable_p (algs->unknown_size, memset)))
23946 {
23947 enum stringop_alg alg;
23948
23949 /* If there aren't any usable algorithms, then recursing on
23950 smaller sizes isn't going to find anything. Just return the
23951 simple byte-at-a-time copy loop. */
23952 if (!any_alg_usable_p)
23953 {
23954 /* Pick something reasonable. */
23955 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23956 *dynamic_check = 128;
23957 return loop_1_byte;
23958 }
23959 if (max <= 0)
23960 max = 4096;
23961 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23962 zero_memset, dynamic_check, noalign);
23963 gcc_assert (*dynamic_check == -1);
23964 gcc_assert (alg != libcall);
23965 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23966 *dynamic_check = max;
23967 return alg;
23968 }
23969 return (alg_usable_p (algs->unknown_size, memset)
23970 ? algs->unknown_size : libcall);
23971 }
23972
23973 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23974 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23975 static int
23976 decide_alignment (int align,
23977 enum stringop_alg alg,
23978 int expected_size,
23979 enum machine_mode move_mode)
23980 {
23981 int desired_align = 0;
23982
23983 gcc_assert (alg != no_stringop);
23984
23985 if (alg == libcall)
23986 return 0;
23987 if (move_mode == VOIDmode)
23988 return 0;
23989
23990 desired_align = GET_MODE_SIZE (move_mode);
23991 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23992 copying whole cacheline at once. */
23993 if (TARGET_PENTIUMPRO
23994 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23995 desired_align = 8;
23996
23997 if (optimize_size)
23998 desired_align = 1;
23999 if (desired_align < align)
24000 desired_align = align;
24001 if (expected_size != -1 && expected_size < 4)
24002 desired_align = align;
24003
24004 return desired_align;
24005 }
24006
24007
24008 /* Helper function for memcpy. For QImode value 0xXY produce
24009 0xXYXYXYXY of wide specified by MODE. This is essentially
24010 a * 0x10101010, but we can do slightly better than
24011 synth_mult by unwinding the sequence by hand on CPUs with
24012 slow multiply. */
24013 static rtx
24014 promote_duplicated_reg (enum machine_mode mode, rtx val)
24015 {
24016 enum machine_mode valmode = GET_MODE (val);
24017 rtx tmp;
24018 int nops = mode == DImode ? 3 : 2;
24019
24020 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24021 if (val == const0_rtx)
24022 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24023 if (CONST_INT_P (val))
24024 {
24025 HOST_WIDE_INT v = INTVAL (val) & 255;
24026
24027 v |= v << 8;
24028 v |= v << 16;
24029 if (mode == DImode)
24030 v |= (v << 16) << 16;
24031 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24032 }
24033
24034 if (valmode == VOIDmode)
24035 valmode = QImode;
24036 if (valmode != QImode)
24037 val = gen_lowpart (QImode, val);
24038 if (mode == QImode)
24039 return val;
24040 if (!TARGET_PARTIAL_REG_STALL)
24041 nops--;
24042 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24043 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24044 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24045 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24046 {
24047 rtx reg = convert_modes (mode, QImode, val, true);
24048 tmp = promote_duplicated_reg (mode, const1_rtx);
24049 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24050 OPTAB_DIRECT);
24051 }
24052 else
24053 {
24054 rtx reg = convert_modes (mode, QImode, val, true);
24055
24056 if (!TARGET_PARTIAL_REG_STALL)
24057 if (mode == SImode)
24058 emit_insn (gen_movsi_insv_1 (reg, reg));
24059 else
24060 emit_insn (gen_movdi_insv_1 (reg, reg));
24061 else
24062 {
24063 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24064 NULL, 1, OPTAB_DIRECT);
24065 reg =
24066 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24067 }
24068 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24069 NULL, 1, OPTAB_DIRECT);
24070 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24071 if (mode == SImode)
24072 return reg;
24073 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24074 NULL, 1, OPTAB_DIRECT);
24075 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24076 return reg;
24077 }
24078 }
24079
24080 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24081 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24082 alignment from ALIGN to DESIRED_ALIGN. */
24083 static rtx
24084 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24085 int align)
24086 {
24087 rtx promoted_val;
24088
24089 if (TARGET_64BIT
24090 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24091 promoted_val = promote_duplicated_reg (DImode, val);
24092 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24093 promoted_val = promote_duplicated_reg (SImode, val);
24094 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24095 promoted_val = promote_duplicated_reg (HImode, val);
24096 else
24097 promoted_val = val;
24098
24099 return promoted_val;
24100 }
24101
24102 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24103 operations when profitable. The code depends upon architecture, block size
24104 and alignment, but always has one of the following overall structures:
24105
24106 Aligned move sequence:
24107
24108 1) Prologue guard: Conditional that jumps up to epilogues for small
24109 blocks that can be handled by epilogue alone. This is faster
24110 but also needed for correctness, since prologue assume the block
24111 is larger than the desired alignment.
24112
24113 Optional dynamic check for size and libcall for large
24114 blocks is emitted here too, with -minline-stringops-dynamically.
24115
24116 2) Prologue: copy first few bytes in order to get destination
24117 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24118 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24119 copied. We emit either a jump tree on power of two sized
24120 blocks, or a byte loop.
24121
24122 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24123 with specified algorithm.
24124
24125 4) Epilogue: code copying tail of the block that is too small to be
24126 handled by main body (or up to size guarded by prologue guard).
24127
24128 Misaligned move sequence
24129
24130 1) missaligned move prologue/epilogue containing:
24131 a) Prologue handling small memory blocks and jumping to done_label
24132 (skipped if blocks are known to be large enough)
24133 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24134 needed by single possibly misaligned move
24135 (skipped if alignment is not needed)
24136 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24137
24138 2) Zero size guard dispatching to done_label, if needed
24139
24140 3) dispatch to library call, if needed,
24141
24142 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24143 with specified algorithm. */
24144 bool
24145 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24146 rtx align_exp, rtx expected_align_exp,
24147 rtx expected_size_exp, rtx min_size_exp,
24148 rtx max_size_exp, rtx probable_max_size_exp,
24149 bool issetmem)
24150 {
24151 rtx destreg;
24152 rtx srcreg = NULL;
24153 rtx label = NULL;
24154 rtx tmp;
24155 rtx jump_around_label = NULL;
24156 HOST_WIDE_INT align = 1;
24157 unsigned HOST_WIDE_INT count = 0;
24158 HOST_WIDE_INT expected_size = -1;
24159 int size_needed = 0, epilogue_size_needed;
24160 int desired_align = 0, align_bytes = 0;
24161 enum stringop_alg alg;
24162 rtx promoted_val = NULL;
24163 rtx vec_promoted_val = NULL;
24164 bool force_loopy_epilogue = false;
24165 int dynamic_check;
24166 bool need_zero_guard = false;
24167 bool noalign;
24168 enum machine_mode move_mode = VOIDmode;
24169 int unroll_factor = 1;
24170 /* TODO: Once value ranges are available, fill in proper data. */
24171 unsigned HOST_WIDE_INT min_size = 0;
24172 unsigned HOST_WIDE_INT max_size = -1;
24173 unsigned HOST_WIDE_INT probable_max_size = -1;
24174 bool misaligned_prologue_used = false;
24175
24176 if (CONST_INT_P (align_exp))
24177 align = INTVAL (align_exp);
24178 /* i386 can do misaligned access on reasonably increased cost. */
24179 if (CONST_INT_P (expected_align_exp)
24180 && INTVAL (expected_align_exp) > align)
24181 align = INTVAL (expected_align_exp);
24182 /* ALIGN is the minimum of destination and source alignment, but we care here
24183 just about destination alignment. */
24184 else if (!issetmem
24185 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24186 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24187
24188 if (CONST_INT_P (count_exp))
24189 {
24190 min_size = max_size = probable_max_size = count = expected_size
24191 = INTVAL (count_exp);
24192 /* When COUNT is 0, there is nothing to do. */
24193 if (!count)
24194 return true;
24195 }
24196 else
24197 {
24198 if (min_size_exp)
24199 min_size = INTVAL (min_size_exp);
24200 if (max_size_exp)
24201 max_size = INTVAL (max_size_exp);
24202 if (probable_max_size_exp)
24203 probable_max_size = INTVAL (probable_max_size_exp);
24204 if (CONST_INT_P (expected_size_exp))
24205 expected_size = INTVAL (expected_size_exp);
24206 }
24207
24208 /* Make sure we don't need to care about overflow later on. */
24209 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24210 return false;
24211
24212 /* Step 0: Decide on preferred algorithm, desired alignment and
24213 size of chunks to be copied by main loop. */
24214 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24215 issetmem,
24216 issetmem && val_exp == const0_rtx,
24217 &dynamic_check, &noalign);
24218 if (alg == libcall)
24219 return false;
24220 gcc_assert (alg != no_stringop);
24221
24222 /* For now vector-version of memset is generated only for memory zeroing, as
24223 creating of promoted vector value is very cheap in this case. */
24224 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24225 alg = unrolled_loop;
24226
24227 if (!count)
24228 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24229 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24230 if (!issetmem)
24231 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24232
24233 unroll_factor = 1;
24234 move_mode = word_mode;
24235 switch (alg)
24236 {
24237 case libcall:
24238 case no_stringop:
24239 case last_alg:
24240 gcc_unreachable ();
24241 case loop_1_byte:
24242 need_zero_guard = true;
24243 move_mode = QImode;
24244 break;
24245 case loop:
24246 need_zero_guard = true;
24247 break;
24248 case unrolled_loop:
24249 need_zero_guard = true;
24250 unroll_factor = (TARGET_64BIT ? 4 : 2);
24251 break;
24252 case vector_loop:
24253 need_zero_guard = true;
24254 unroll_factor = 4;
24255 /* Find the widest supported mode. */
24256 move_mode = word_mode;
24257 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24258 != CODE_FOR_nothing)
24259 move_mode = GET_MODE_WIDER_MODE (move_mode);
24260
24261 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24262 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24263 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24264 {
24265 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24266 move_mode = mode_for_vector (word_mode, nunits);
24267 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24268 move_mode = word_mode;
24269 }
24270 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24271 break;
24272 case rep_prefix_8_byte:
24273 move_mode = DImode;
24274 break;
24275 case rep_prefix_4_byte:
24276 move_mode = SImode;
24277 break;
24278 case rep_prefix_1_byte:
24279 move_mode = QImode;
24280 break;
24281 }
24282 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24283 epilogue_size_needed = size_needed;
24284
24285 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24286 if (!TARGET_ALIGN_STRINGOPS || noalign)
24287 align = desired_align;
24288
24289 /* Step 1: Prologue guard. */
24290
24291 /* Alignment code needs count to be in register. */
24292 if (CONST_INT_P (count_exp) && desired_align > align)
24293 {
24294 if (INTVAL (count_exp) > desired_align
24295 && INTVAL (count_exp) > size_needed)
24296 {
24297 align_bytes
24298 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24299 if (align_bytes <= 0)
24300 align_bytes = 0;
24301 else
24302 align_bytes = desired_align - align_bytes;
24303 }
24304 if (align_bytes == 0)
24305 count_exp = force_reg (counter_mode (count_exp), count_exp);
24306 }
24307 gcc_assert (desired_align >= 1 && align >= 1);
24308
24309 /* Misaligned move sequences handle both prologue and epilogue at once.
24310 Default code generation results in a smaller code for large alignments
24311 and also avoids redundant job when sizes are known precisely. */
24312 misaligned_prologue_used
24313 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24314 && MAX (desired_align, epilogue_size_needed) <= 32
24315 && desired_align <= epilogue_size_needed
24316 && ((desired_align > align && !align_bytes)
24317 || (!count && epilogue_size_needed > 1)));
24318
24319 /* Do the cheap promotion to allow better CSE across the
24320 main loop and epilogue (ie one load of the big constant in the
24321 front of all code.
24322 For now the misaligned move sequences do not have fast path
24323 without broadcasting. */
24324 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24325 {
24326 if (alg == vector_loop)
24327 {
24328 gcc_assert (val_exp == const0_rtx);
24329 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24330 promoted_val = promote_duplicated_reg_to_size (val_exp,
24331 GET_MODE_SIZE (word_mode),
24332 desired_align, align);
24333 }
24334 else
24335 {
24336 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24337 desired_align, align);
24338 }
24339 }
24340 /* Misaligned move sequences handles both prologues and epilogues at once.
24341 Default code generation results in smaller code for large alignments and
24342 also avoids redundant job when sizes are known precisely. */
24343 if (misaligned_prologue_used)
24344 {
24345 /* Misaligned move prologue handled small blocks by itself. */
24346 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24347 (dst, src, &destreg, &srcreg,
24348 move_mode, promoted_val, vec_promoted_val,
24349 &count_exp,
24350 &jump_around_label,
24351 desired_align < align
24352 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24353 desired_align, align, &min_size, dynamic_check, issetmem);
24354 if (!issetmem)
24355 src = change_address (src, BLKmode, srcreg);
24356 dst = change_address (dst, BLKmode, destreg);
24357 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24358 epilogue_size_needed = 0;
24359 if (need_zero_guard && !min_size)
24360 {
24361 /* It is possible that we copied enough so the main loop will not
24362 execute. */
24363 gcc_assert (size_needed > 1);
24364 if (jump_around_label == NULL_RTX)
24365 jump_around_label = gen_label_rtx ();
24366 emit_cmp_and_jump_insns (count_exp,
24367 GEN_INT (size_needed),
24368 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24369 if (expected_size == -1
24370 || expected_size < (desired_align - align) / 2 + size_needed)
24371 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24372 else
24373 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24374 }
24375 }
24376 /* Ensure that alignment prologue won't copy past end of block. */
24377 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24378 {
24379 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24380 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24381 Make sure it is power of 2. */
24382 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24383
24384 /* To improve performance of small blocks, we jump around the VAL
24385 promoting mode. This mean that if the promoted VAL is not constant,
24386 we might not use it in the epilogue and have to use byte
24387 loop variant. */
24388 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24389 force_loopy_epilogue = true;
24390 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24391 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24392 {
24393 /* If main algorithm works on QImode, no epilogue is needed.
24394 For small sizes just don't align anything. */
24395 if (size_needed == 1)
24396 desired_align = align;
24397 else
24398 goto epilogue;
24399 }
24400 else if (!count
24401 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24402 {
24403 label = gen_label_rtx ();
24404 emit_cmp_and_jump_insns (count_exp,
24405 GEN_INT (epilogue_size_needed),
24406 LTU, 0, counter_mode (count_exp), 1, label);
24407 if (expected_size == -1 || expected_size < epilogue_size_needed)
24408 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24409 else
24410 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24411 }
24412 }
24413
24414 /* Emit code to decide on runtime whether library call or inline should be
24415 used. */
24416 if (dynamic_check != -1)
24417 {
24418 if (!issetmem && CONST_INT_P (count_exp))
24419 {
24420 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24421 {
24422 emit_block_move_via_libcall (dst, src, count_exp, false);
24423 count_exp = const0_rtx;
24424 goto epilogue;
24425 }
24426 }
24427 else
24428 {
24429 rtx hot_label = gen_label_rtx ();
24430 if (jump_around_label == NULL_RTX)
24431 jump_around_label = gen_label_rtx ();
24432 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24433 LEU, 0, counter_mode (count_exp),
24434 1, hot_label);
24435 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24436 if (issetmem)
24437 set_storage_via_libcall (dst, count_exp, val_exp, false);
24438 else
24439 emit_block_move_via_libcall (dst, src, count_exp, false);
24440 emit_jump (jump_around_label);
24441 emit_label (hot_label);
24442 }
24443 }
24444
24445 /* Step 2: Alignment prologue. */
24446 /* Do the expensive promotion once we branched off the small blocks. */
24447 if (issetmem && !promoted_val)
24448 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24449 desired_align, align);
24450
24451 if (desired_align > align && !misaligned_prologue_used)
24452 {
24453 if (align_bytes == 0)
24454 {
24455 /* Except for the first move in prologue, we no longer know
24456 constant offset in aliasing info. It don't seems to worth
24457 the pain to maintain it for the first move, so throw away
24458 the info early. */
24459 dst = change_address (dst, BLKmode, destreg);
24460 if (!issetmem)
24461 src = change_address (src, BLKmode, srcreg);
24462 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24463 promoted_val, vec_promoted_val,
24464 count_exp, align, desired_align,
24465 issetmem);
24466 /* At most desired_align - align bytes are copied. */
24467 if (min_size < (unsigned)(desired_align - align))
24468 min_size = 0;
24469 else
24470 min_size -= desired_align - align;
24471 }
24472 else
24473 {
24474 /* If we know how many bytes need to be stored before dst is
24475 sufficiently aligned, maintain aliasing info accurately. */
24476 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24477 srcreg,
24478 promoted_val,
24479 vec_promoted_val,
24480 desired_align,
24481 align_bytes,
24482 issetmem);
24483
24484 count_exp = plus_constant (counter_mode (count_exp),
24485 count_exp, -align_bytes);
24486 count -= align_bytes;
24487 min_size -= align_bytes;
24488 max_size -= align_bytes;
24489 }
24490 if (need_zero_guard
24491 && !min_size
24492 && (count < (unsigned HOST_WIDE_INT) size_needed
24493 || (align_bytes == 0
24494 && count < ((unsigned HOST_WIDE_INT) size_needed
24495 + desired_align - align))))
24496 {
24497 /* It is possible that we copied enough so the main loop will not
24498 execute. */
24499 gcc_assert (size_needed > 1);
24500 if (label == NULL_RTX)
24501 label = gen_label_rtx ();
24502 emit_cmp_and_jump_insns (count_exp,
24503 GEN_INT (size_needed),
24504 LTU, 0, counter_mode (count_exp), 1, label);
24505 if (expected_size == -1
24506 || expected_size < (desired_align - align) / 2 + size_needed)
24507 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24508 else
24509 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24510 }
24511 }
24512 if (label && size_needed == 1)
24513 {
24514 emit_label (label);
24515 LABEL_NUSES (label) = 1;
24516 label = NULL;
24517 epilogue_size_needed = 1;
24518 if (issetmem)
24519 promoted_val = val_exp;
24520 }
24521 else if (label == NULL_RTX && !misaligned_prologue_used)
24522 epilogue_size_needed = size_needed;
24523
24524 /* Step 3: Main loop. */
24525
24526 switch (alg)
24527 {
24528 case libcall:
24529 case no_stringop:
24530 case last_alg:
24531 gcc_unreachable ();
24532 case loop_1_byte:
24533 case loop:
24534 case unrolled_loop:
24535 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24536 count_exp, move_mode, unroll_factor,
24537 expected_size, issetmem);
24538 break;
24539 case vector_loop:
24540 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24541 vec_promoted_val, count_exp, move_mode,
24542 unroll_factor, expected_size, issetmem);
24543 break;
24544 case rep_prefix_8_byte:
24545 case rep_prefix_4_byte:
24546 case rep_prefix_1_byte:
24547 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24548 val_exp, count_exp, move_mode, issetmem);
24549 break;
24550 }
24551 /* Adjust properly the offset of src and dest memory for aliasing. */
24552 if (CONST_INT_P (count_exp))
24553 {
24554 if (!issetmem)
24555 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24556 (count / size_needed) * size_needed);
24557 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24558 (count / size_needed) * size_needed);
24559 }
24560 else
24561 {
24562 if (!issetmem)
24563 src = change_address (src, BLKmode, srcreg);
24564 dst = change_address (dst, BLKmode, destreg);
24565 }
24566
24567 /* Step 4: Epilogue to copy the remaining bytes. */
24568 epilogue:
24569 if (label)
24570 {
24571 /* When the main loop is done, COUNT_EXP might hold original count,
24572 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24573 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24574 bytes. Compensate if needed. */
24575
24576 if (size_needed < epilogue_size_needed)
24577 {
24578 tmp =
24579 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24580 GEN_INT (size_needed - 1), count_exp, 1,
24581 OPTAB_DIRECT);
24582 if (tmp != count_exp)
24583 emit_move_insn (count_exp, tmp);
24584 }
24585 emit_label (label);
24586 LABEL_NUSES (label) = 1;
24587 }
24588
24589 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24590 {
24591 if (force_loopy_epilogue)
24592 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24593 epilogue_size_needed);
24594 else
24595 {
24596 if (issetmem)
24597 expand_setmem_epilogue (dst, destreg, promoted_val,
24598 vec_promoted_val, count_exp,
24599 epilogue_size_needed);
24600 else
24601 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24602 epilogue_size_needed);
24603 }
24604 }
24605 if (jump_around_label)
24606 emit_label (jump_around_label);
24607 return true;
24608 }
24609
24610
24611 /* Expand the appropriate insns for doing strlen if not just doing
24612 repnz; scasb
24613
24614 out = result, initialized with the start address
24615 align_rtx = alignment of the address.
24616 scratch = scratch register, initialized with the startaddress when
24617 not aligned, otherwise undefined
24618
24619 This is just the body. It needs the initializations mentioned above and
24620 some address computing at the end. These things are done in i386.md. */
24621
24622 static void
24623 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24624 {
24625 int align;
24626 rtx tmp;
24627 rtx align_2_label = NULL_RTX;
24628 rtx align_3_label = NULL_RTX;
24629 rtx align_4_label = gen_label_rtx ();
24630 rtx end_0_label = gen_label_rtx ();
24631 rtx mem;
24632 rtx tmpreg = gen_reg_rtx (SImode);
24633 rtx scratch = gen_reg_rtx (SImode);
24634 rtx cmp;
24635
24636 align = 0;
24637 if (CONST_INT_P (align_rtx))
24638 align = INTVAL (align_rtx);
24639
24640 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24641
24642 /* Is there a known alignment and is it less than 4? */
24643 if (align < 4)
24644 {
24645 rtx scratch1 = gen_reg_rtx (Pmode);
24646 emit_move_insn (scratch1, out);
24647 /* Is there a known alignment and is it not 2? */
24648 if (align != 2)
24649 {
24650 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24651 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24652
24653 /* Leave just the 3 lower bits. */
24654 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24655 NULL_RTX, 0, OPTAB_WIDEN);
24656
24657 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24658 Pmode, 1, align_4_label);
24659 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24660 Pmode, 1, align_2_label);
24661 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24662 Pmode, 1, align_3_label);
24663 }
24664 else
24665 {
24666 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24667 check if is aligned to 4 - byte. */
24668
24669 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24670 NULL_RTX, 0, OPTAB_WIDEN);
24671
24672 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24673 Pmode, 1, align_4_label);
24674 }
24675
24676 mem = change_address (src, QImode, out);
24677
24678 /* Now compare the bytes. */
24679
24680 /* Compare the first n unaligned byte on a byte per byte basis. */
24681 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24682 QImode, 1, end_0_label);
24683
24684 /* Increment the address. */
24685 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24686
24687 /* Not needed with an alignment of 2 */
24688 if (align != 2)
24689 {
24690 emit_label (align_2_label);
24691
24692 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24693 end_0_label);
24694
24695 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24696
24697 emit_label (align_3_label);
24698 }
24699
24700 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24701 end_0_label);
24702
24703 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24704 }
24705
24706 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24707 align this loop. It gives only huge programs, but does not help to
24708 speed up. */
24709 emit_label (align_4_label);
24710
24711 mem = change_address (src, SImode, out);
24712 emit_move_insn (scratch, mem);
24713 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24714
24715 /* This formula yields a nonzero result iff one of the bytes is zero.
24716 This saves three branches inside loop and many cycles. */
24717
24718 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24719 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24720 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24721 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24722 gen_int_mode (0x80808080, SImode)));
24723 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24724 align_4_label);
24725
24726 if (TARGET_CMOVE)
24727 {
24728 rtx reg = gen_reg_rtx (SImode);
24729 rtx reg2 = gen_reg_rtx (Pmode);
24730 emit_move_insn (reg, tmpreg);
24731 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24732
24733 /* If zero is not in the first two bytes, move two bytes forward. */
24734 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24735 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24736 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24737 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24738 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24739 reg,
24740 tmpreg)));
24741 /* Emit lea manually to avoid clobbering of flags. */
24742 emit_insn (gen_rtx_SET (SImode, reg2,
24743 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24744
24745 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24746 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24747 emit_insn (gen_rtx_SET (VOIDmode, out,
24748 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24749 reg2,
24750 out)));
24751 }
24752 else
24753 {
24754 rtx end_2_label = gen_label_rtx ();
24755 /* Is zero in the first two bytes? */
24756
24757 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24758 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24759 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24760 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24761 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24762 pc_rtx);
24763 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24764 JUMP_LABEL (tmp) = end_2_label;
24765
24766 /* Not in the first two. Move two bytes forward. */
24767 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24768 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24769
24770 emit_label (end_2_label);
24771
24772 }
24773
24774 /* Avoid branch in fixing the byte. */
24775 tmpreg = gen_lowpart (QImode, tmpreg);
24776 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24777 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24778 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24779 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24780
24781 emit_label (end_0_label);
24782 }
24783
24784 /* Expand strlen. */
24785
24786 bool
24787 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24788 {
24789 rtx addr, scratch1, scratch2, scratch3, scratch4;
24790
24791 /* The generic case of strlen expander is long. Avoid it's
24792 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24793
24794 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24795 && !TARGET_INLINE_ALL_STRINGOPS
24796 && !optimize_insn_for_size_p ()
24797 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24798 return false;
24799
24800 addr = force_reg (Pmode, XEXP (src, 0));
24801 scratch1 = gen_reg_rtx (Pmode);
24802
24803 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24804 && !optimize_insn_for_size_p ())
24805 {
24806 /* Well it seems that some optimizer does not combine a call like
24807 foo(strlen(bar), strlen(bar));
24808 when the move and the subtraction is done here. It does calculate
24809 the length just once when these instructions are done inside of
24810 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24811 often used and I use one fewer register for the lifetime of
24812 output_strlen_unroll() this is better. */
24813
24814 emit_move_insn (out, addr);
24815
24816 ix86_expand_strlensi_unroll_1 (out, src, align);
24817
24818 /* strlensi_unroll_1 returns the address of the zero at the end of
24819 the string, like memchr(), so compute the length by subtracting
24820 the start address. */
24821 emit_insn (ix86_gen_sub3 (out, out, addr));
24822 }
24823 else
24824 {
24825 rtx unspec;
24826
24827 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24828 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24829 return false;
24830
24831 scratch2 = gen_reg_rtx (Pmode);
24832 scratch3 = gen_reg_rtx (Pmode);
24833 scratch4 = force_reg (Pmode, constm1_rtx);
24834
24835 emit_move_insn (scratch3, addr);
24836 eoschar = force_reg (QImode, eoschar);
24837
24838 src = replace_equiv_address_nv (src, scratch3);
24839
24840 /* If .md starts supporting :P, this can be done in .md. */
24841 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24842 scratch4), UNSPEC_SCAS);
24843 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24844 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24845 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24846 }
24847 return true;
24848 }
24849
24850 /* For given symbol (function) construct code to compute address of it's PLT
24851 entry in large x86-64 PIC model. */
24852 static rtx
24853 construct_plt_address (rtx symbol)
24854 {
24855 rtx tmp, unspec;
24856
24857 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24858 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24859 gcc_assert (Pmode == DImode);
24860
24861 tmp = gen_reg_rtx (Pmode);
24862 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24863
24864 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24865 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24866 return tmp;
24867 }
24868
24869 rtx
24870 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24871 rtx callarg2,
24872 rtx pop, bool sibcall)
24873 {
24874 unsigned int const cregs_size
24875 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24876 rtx vec[3 + cregs_size];
24877 rtx use = NULL, call;
24878 unsigned int vec_len = 0;
24879
24880 if (pop == const0_rtx)
24881 pop = NULL;
24882 gcc_assert (!TARGET_64BIT || !pop);
24883
24884 if (TARGET_MACHO && !TARGET_64BIT)
24885 {
24886 #if TARGET_MACHO
24887 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24888 fnaddr = machopic_indirect_call_target (fnaddr);
24889 #endif
24890 }
24891 else
24892 {
24893 /* Static functions and indirect calls don't need the pic register. */
24894 if (flag_pic
24895 && (!TARGET_64BIT
24896 || (ix86_cmodel == CM_LARGE_PIC
24897 && DEFAULT_ABI != MS_ABI))
24898 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24899 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24900 use_reg (&use, pic_offset_table_rtx);
24901 }
24902
24903 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24904 {
24905 rtx al = gen_rtx_REG (QImode, AX_REG);
24906 emit_move_insn (al, callarg2);
24907 use_reg (&use, al);
24908 }
24909
24910 if (ix86_cmodel == CM_LARGE_PIC
24911 && !TARGET_PECOFF
24912 && MEM_P (fnaddr)
24913 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24914 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24915 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24916 else if (sibcall
24917 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24918 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24919 {
24920 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24921 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24922 }
24923
24924 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24925 if (retval)
24926 call = gen_rtx_SET (VOIDmode, retval, call);
24927 vec[vec_len++] = call;
24928
24929 if (pop)
24930 {
24931 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24932 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24933 vec[vec_len++] = pop;
24934 }
24935
24936 if (TARGET_64BIT_MS_ABI
24937 && (!callarg2 || INTVAL (callarg2) != -2))
24938 {
24939 unsigned i;
24940
24941 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24942 UNSPEC_MS_TO_SYSV_CALL);
24943
24944 for (i = 0; i < cregs_size; i++)
24945 {
24946 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24947 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24948
24949 vec[vec_len++]
24950 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24951 }
24952 }
24953
24954 if (vec_len > 1)
24955 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24956 call = emit_call_insn (call);
24957 if (use)
24958 CALL_INSN_FUNCTION_USAGE (call) = use;
24959
24960 return call;
24961 }
24962
24963 /* Output the assembly for a call instruction. */
24964
24965 const char *
24966 ix86_output_call_insn (rtx insn, rtx call_op)
24967 {
24968 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24969 bool seh_nop_p = false;
24970 const char *xasm;
24971
24972 if (SIBLING_CALL_P (insn))
24973 {
24974 if (direct_p)
24975 xasm = "jmp\t%P0";
24976 /* SEH epilogue detection requires the indirect branch case
24977 to include REX.W. */
24978 else if (TARGET_SEH)
24979 xasm = "rex.W jmp %A0";
24980 else
24981 xasm = "jmp\t%A0";
24982
24983 output_asm_insn (xasm, &call_op);
24984 return "";
24985 }
24986
24987 /* SEH unwinding can require an extra nop to be emitted in several
24988 circumstances. Determine if we have one of those. */
24989 if (TARGET_SEH)
24990 {
24991 rtx i;
24992
24993 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24994 {
24995 /* If we get to another real insn, we don't need the nop. */
24996 if (INSN_P (i))
24997 break;
24998
24999 /* If we get to the epilogue note, prevent a catch region from
25000 being adjacent to the standard epilogue sequence. If non-
25001 call-exceptions, we'll have done this during epilogue emission. */
25002 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25003 && !flag_non_call_exceptions
25004 && !can_throw_internal (insn))
25005 {
25006 seh_nop_p = true;
25007 break;
25008 }
25009 }
25010
25011 /* If we didn't find a real insn following the call, prevent the
25012 unwinder from looking into the next function. */
25013 if (i == NULL)
25014 seh_nop_p = true;
25015 }
25016
25017 if (direct_p)
25018 xasm = "call\t%P0";
25019 else
25020 xasm = "call\t%A0";
25021
25022 output_asm_insn (xasm, &call_op);
25023
25024 if (seh_nop_p)
25025 return "nop";
25026
25027 return "";
25028 }
25029 \f
25030 /* Clear stack slot assignments remembered from previous functions.
25031 This is called from INIT_EXPANDERS once before RTL is emitted for each
25032 function. */
25033
25034 static struct machine_function *
25035 ix86_init_machine_status (void)
25036 {
25037 struct machine_function *f;
25038
25039 f = ggc_cleared_alloc<machine_function> ();
25040 f->use_fast_prologue_epilogue_nregs = -1;
25041 f->call_abi = ix86_abi;
25042
25043 return f;
25044 }
25045
25046 /* Return a MEM corresponding to a stack slot with mode MODE.
25047 Allocate a new slot if necessary.
25048
25049 The RTL for a function can have several slots available: N is
25050 which slot to use. */
25051
25052 rtx
25053 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25054 {
25055 struct stack_local_entry *s;
25056
25057 gcc_assert (n < MAX_386_STACK_LOCALS);
25058
25059 for (s = ix86_stack_locals; s; s = s->next)
25060 if (s->mode == mode && s->n == n)
25061 return validize_mem (copy_rtx (s->rtl));
25062
25063 s = ggc_alloc<stack_local_entry> ();
25064 s->n = n;
25065 s->mode = mode;
25066 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25067
25068 s->next = ix86_stack_locals;
25069 ix86_stack_locals = s;
25070 return validize_mem (copy_rtx (s->rtl));
25071 }
25072
25073 static void
25074 ix86_instantiate_decls (void)
25075 {
25076 struct stack_local_entry *s;
25077
25078 for (s = ix86_stack_locals; s; s = s->next)
25079 if (s->rtl != NULL_RTX)
25080 instantiate_decl_rtl (s->rtl);
25081 }
25082 \f
25083 /* Check whether x86 address PARTS is a pc-relative address. */
25084
25085 static bool
25086 rip_relative_addr_p (struct ix86_address *parts)
25087 {
25088 rtx base, index, disp;
25089
25090 base = parts->base;
25091 index = parts->index;
25092 disp = parts->disp;
25093
25094 if (disp && !base && !index)
25095 {
25096 if (TARGET_64BIT)
25097 {
25098 rtx symbol = disp;
25099
25100 if (GET_CODE (disp) == CONST)
25101 symbol = XEXP (disp, 0);
25102 if (GET_CODE (symbol) == PLUS
25103 && CONST_INT_P (XEXP (symbol, 1)))
25104 symbol = XEXP (symbol, 0);
25105
25106 if (GET_CODE (symbol) == LABEL_REF
25107 || (GET_CODE (symbol) == SYMBOL_REF
25108 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25109 || (GET_CODE (symbol) == UNSPEC
25110 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25111 || XINT (symbol, 1) == UNSPEC_PCREL
25112 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25113 return true;
25114 }
25115 }
25116 return false;
25117 }
25118
25119 /* Calculate the length of the memory address in the instruction encoding.
25120 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25121 or other prefixes. We never generate addr32 prefix for LEA insn. */
25122
25123 int
25124 memory_address_length (rtx addr, bool lea)
25125 {
25126 struct ix86_address parts;
25127 rtx base, index, disp;
25128 int len;
25129 int ok;
25130
25131 if (GET_CODE (addr) == PRE_DEC
25132 || GET_CODE (addr) == POST_INC
25133 || GET_CODE (addr) == PRE_MODIFY
25134 || GET_CODE (addr) == POST_MODIFY)
25135 return 0;
25136
25137 ok = ix86_decompose_address (addr, &parts);
25138 gcc_assert (ok);
25139
25140 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25141
25142 /* If this is not LEA instruction, add the length of addr32 prefix. */
25143 if (TARGET_64BIT && !lea
25144 && (SImode_address_operand (addr, VOIDmode)
25145 || (parts.base && GET_MODE (parts.base) == SImode)
25146 || (parts.index && GET_MODE (parts.index) == SImode)))
25147 len++;
25148
25149 base = parts.base;
25150 index = parts.index;
25151 disp = parts.disp;
25152
25153 if (base && GET_CODE (base) == SUBREG)
25154 base = SUBREG_REG (base);
25155 if (index && GET_CODE (index) == SUBREG)
25156 index = SUBREG_REG (index);
25157
25158 gcc_assert (base == NULL_RTX || REG_P (base));
25159 gcc_assert (index == NULL_RTX || REG_P (index));
25160
25161 /* Rule of thumb:
25162 - esp as the base always wants an index,
25163 - ebp as the base always wants a displacement,
25164 - r12 as the base always wants an index,
25165 - r13 as the base always wants a displacement. */
25166
25167 /* Register Indirect. */
25168 if (base && !index && !disp)
25169 {
25170 /* esp (for its index) and ebp (for its displacement) need
25171 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25172 code. */
25173 if (base == arg_pointer_rtx
25174 || base == frame_pointer_rtx
25175 || REGNO (base) == SP_REG
25176 || REGNO (base) == BP_REG
25177 || REGNO (base) == R12_REG
25178 || REGNO (base) == R13_REG)
25179 len++;
25180 }
25181
25182 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25183 is not disp32, but disp32(%rip), so for disp32
25184 SIB byte is needed, unless print_operand_address
25185 optimizes it into disp32(%rip) or (%rip) is implied
25186 by UNSPEC. */
25187 else if (disp && !base && !index)
25188 {
25189 len += 4;
25190 if (rip_relative_addr_p (&parts))
25191 len++;
25192 }
25193 else
25194 {
25195 /* Find the length of the displacement constant. */
25196 if (disp)
25197 {
25198 if (base && satisfies_constraint_K (disp))
25199 len += 1;
25200 else
25201 len += 4;
25202 }
25203 /* ebp always wants a displacement. Similarly r13. */
25204 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25205 len++;
25206
25207 /* An index requires the two-byte modrm form.... */
25208 if (index
25209 /* ...like esp (or r12), which always wants an index. */
25210 || base == arg_pointer_rtx
25211 || base == frame_pointer_rtx
25212 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25213 len++;
25214 }
25215
25216 return len;
25217 }
25218
25219 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25220 is set, expect that insn have 8bit immediate alternative. */
25221 int
25222 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25223 {
25224 int len = 0;
25225 int i;
25226 extract_insn_cached (insn);
25227 for (i = recog_data.n_operands - 1; i >= 0; --i)
25228 if (CONSTANT_P (recog_data.operand[i]))
25229 {
25230 enum attr_mode mode = get_attr_mode (insn);
25231
25232 gcc_assert (!len);
25233 if (shortform && CONST_INT_P (recog_data.operand[i]))
25234 {
25235 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25236 switch (mode)
25237 {
25238 case MODE_QI:
25239 len = 1;
25240 continue;
25241 case MODE_HI:
25242 ival = trunc_int_for_mode (ival, HImode);
25243 break;
25244 case MODE_SI:
25245 ival = trunc_int_for_mode (ival, SImode);
25246 break;
25247 default:
25248 break;
25249 }
25250 if (IN_RANGE (ival, -128, 127))
25251 {
25252 len = 1;
25253 continue;
25254 }
25255 }
25256 switch (mode)
25257 {
25258 case MODE_QI:
25259 len = 1;
25260 break;
25261 case MODE_HI:
25262 len = 2;
25263 break;
25264 case MODE_SI:
25265 len = 4;
25266 break;
25267 /* Immediates for DImode instructions are encoded
25268 as 32bit sign extended values. */
25269 case MODE_DI:
25270 len = 4;
25271 break;
25272 default:
25273 fatal_insn ("unknown insn mode", insn);
25274 }
25275 }
25276 return len;
25277 }
25278
25279 /* Compute default value for "length_address" attribute. */
25280 int
25281 ix86_attr_length_address_default (rtx insn)
25282 {
25283 int i;
25284
25285 if (get_attr_type (insn) == TYPE_LEA)
25286 {
25287 rtx set = PATTERN (insn), addr;
25288
25289 if (GET_CODE (set) == PARALLEL)
25290 set = XVECEXP (set, 0, 0);
25291
25292 gcc_assert (GET_CODE (set) == SET);
25293
25294 addr = SET_SRC (set);
25295
25296 return memory_address_length (addr, true);
25297 }
25298
25299 extract_insn_cached (insn);
25300 for (i = recog_data.n_operands - 1; i >= 0; --i)
25301 if (MEM_P (recog_data.operand[i]))
25302 {
25303 constrain_operands_cached (reload_completed);
25304 if (which_alternative != -1)
25305 {
25306 const char *constraints = recog_data.constraints[i];
25307 int alt = which_alternative;
25308
25309 while (*constraints == '=' || *constraints == '+')
25310 constraints++;
25311 while (alt-- > 0)
25312 while (*constraints++ != ',')
25313 ;
25314 /* Skip ignored operands. */
25315 if (*constraints == 'X')
25316 continue;
25317 }
25318 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25319 }
25320 return 0;
25321 }
25322
25323 /* Compute default value for "length_vex" attribute. It includes
25324 2 or 3 byte VEX prefix and 1 opcode byte. */
25325
25326 int
25327 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25328 {
25329 int i;
25330
25331 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25332 byte VEX prefix. */
25333 if (!has_0f_opcode || has_vex_w)
25334 return 3 + 1;
25335
25336 /* We can always use 2 byte VEX prefix in 32bit. */
25337 if (!TARGET_64BIT)
25338 return 2 + 1;
25339
25340 extract_insn_cached (insn);
25341
25342 for (i = recog_data.n_operands - 1; i >= 0; --i)
25343 if (REG_P (recog_data.operand[i]))
25344 {
25345 /* REX.W bit uses 3 byte VEX prefix. */
25346 if (GET_MODE (recog_data.operand[i]) == DImode
25347 && GENERAL_REG_P (recog_data.operand[i]))
25348 return 3 + 1;
25349 }
25350 else
25351 {
25352 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25353 if (MEM_P (recog_data.operand[i])
25354 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25355 return 3 + 1;
25356 }
25357
25358 return 2 + 1;
25359 }
25360 \f
25361 /* Return the maximum number of instructions a cpu can issue. */
25362
25363 static int
25364 ix86_issue_rate (void)
25365 {
25366 switch (ix86_tune)
25367 {
25368 case PROCESSOR_PENTIUM:
25369 case PROCESSOR_BONNELL:
25370 case PROCESSOR_SILVERMONT:
25371 case PROCESSOR_INTEL:
25372 case PROCESSOR_K6:
25373 case PROCESSOR_BTVER2:
25374 case PROCESSOR_PENTIUM4:
25375 case PROCESSOR_NOCONA:
25376 return 2;
25377
25378 case PROCESSOR_PENTIUMPRO:
25379 case PROCESSOR_ATHLON:
25380 case PROCESSOR_K8:
25381 case PROCESSOR_AMDFAM10:
25382 case PROCESSOR_GENERIC:
25383 case PROCESSOR_BTVER1:
25384 return 3;
25385
25386 case PROCESSOR_BDVER1:
25387 case PROCESSOR_BDVER2:
25388 case PROCESSOR_BDVER3:
25389 case PROCESSOR_BDVER4:
25390 case PROCESSOR_CORE2:
25391 case PROCESSOR_NEHALEM:
25392 case PROCESSOR_SANDYBRIDGE:
25393 case PROCESSOR_HASWELL:
25394 return 4;
25395
25396 default:
25397 return 1;
25398 }
25399 }
25400
25401 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25402 by DEP_INSN and nothing set by DEP_INSN. */
25403
25404 static bool
25405 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25406 {
25407 rtx set, set2;
25408
25409 /* Simplify the test for uninteresting insns. */
25410 if (insn_type != TYPE_SETCC
25411 && insn_type != TYPE_ICMOV
25412 && insn_type != TYPE_FCMOV
25413 && insn_type != TYPE_IBR)
25414 return false;
25415
25416 if ((set = single_set (dep_insn)) != 0)
25417 {
25418 set = SET_DEST (set);
25419 set2 = NULL_RTX;
25420 }
25421 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25422 && XVECLEN (PATTERN (dep_insn), 0) == 2
25423 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25424 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25425 {
25426 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25427 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25428 }
25429 else
25430 return false;
25431
25432 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25433 return false;
25434
25435 /* This test is true if the dependent insn reads the flags but
25436 not any other potentially set register. */
25437 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25438 return false;
25439
25440 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25441 return false;
25442
25443 return true;
25444 }
25445
25446 /* Return true iff USE_INSN has a memory address with operands set by
25447 SET_INSN. */
25448
25449 bool
25450 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25451 {
25452 int i;
25453 extract_insn_cached (use_insn);
25454 for (i = recog_data.n_operands - 1; i >= 0; --i)
25455 if (MEM_P (recog_data.operand[i]))
25456 {
25457 rtx addr = XEXP (recog_data.operand[i], 0);
25458 return modified_in_p (addr, set_insn) != 0;
25459 }
25460 return false;
25461 }
25462
25463 /* Helper function for exact_store_load_dependency.
25464 Return true if addr is found in insn. */
25465 static bool
25466 exact_dependency_1 (rtx addr, rtx insn)
25467 {
25468 enum rtx_code code;
25469 const char *format_ptr;
25470 int i, j;
25471
25472 code = GET_CODE (insn);
25473 switch (code)
25474 {
25475 case MEM:
25476 if (rtx_equal_p (addr, insn))
25477 return true;
25478 break;
25479 case REG:
25480 CASE_CONST_ANY:
25481 case SYMBOL_REF:
25482 case CODE_LABEL:
25483 case PC:
25484 case CC0:
25485 case EXPR_LIST:
25486 return false;
25487 default:
25488 break;
25489 }
25490
25491 format_ptr = GET_RTX_FORMAT (code);
25492 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25493 {
25494 switch (*format_ptr++)
25495 {
25496 case 'e':
25497 if (exact_dependency_1 (addr, XEXP (insn, i)))
25498 return true;
25499 break;
25500 case 'E':
25501 for (j = 0; j < XVECLEN (insn, i); j++)
25502 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25503 return true;
25504 break;
25505 }
25506 }
25507 return false;
25508 }
25509
25510 /* Return true if there exists exact dependency for store & load, i.e.
25511 the same memory address is used in them. */
25512 static bool
25513 exact_store_load_dependency (rtx store, rtx load)
25514 {
25515 rtx set1, set2;
25516
25517 set1 = single_set (store);
25518 if (!set1)
25519 return false;
25520 if (!MEM_P (SET_DEST (set1)))
25521 return false;
25522 set2 = single_set (load);
25523 if (!set2)
25524 return false;
25525 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25526 return true;
25527 return false;
25528 }
25529
25530 static int
25531 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25532 {
25533 enum attr_type insn_type, dep_insn_type;
25534 enum attr_memory memory;
25535 rtx set, set2;
25536 int dep_insn_code_number;
25537
25538 /* Anti and output dependencies have zero cost on all CPUs. */
25539 if (REG_NOTE_KIND (link) != 0)
25540 return 0;
25541
25542 dep_insn_code_number = recog_memoized (dep_insn);
25543
25544 /* If we can't recognize the insns, we can't really do anything. */
25545 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25546 return cost;
25547
25548 insn_type = get_attr_type (insn);
25549 dep_insn_type = get_attr_type (dep_insn);
25550
25551 switch (ix86_tune)
25552 {
25553 case PROCESSOR_PENTIUM:
25554 /* Address Generation Interlock adds a cycle of latency. */
25555 if (insn_type == TYPE_LEA)
25556 {
25557 rtx addr = PATTERN (insn);
25558
25559 if (GET_CODE (addr) == PARALLEL)
25560 addr = XVECEXP (addr, 0, 0);
25561
25562 gcc_assert (GET_CODE (addr) == SET);
25563
25564 addr = SET_SRC (addr);
25565 if (modified_in_p (addr, dep_insn))
25566 cost += 1;
25567 }
25568 else if (ix86_agi_dependent (dep_insn, insn))
25569 cost += 1;
25570
25571 /* ??? Compares pair with jump/setcc. */
25572 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25573 cost = 0;
25574
25575 /* Floating point stores require value to be ready one cycle earlier. */
25576 if (insn_type == TYPE_FMOV
25577 && get_attr_memory (insn) == MEMORY_STORE
25578 && !ix86_agi_dependent (dep_insn, insn))
25579 cost += 1;
25580 break;
25581
25582 case PROCESSOR_PENTIUMPRO:
25583 /* INT->FP conversion is expensive. */
25584 if (get_attr_fp_int_src (dep_insn))
25585 cost += 5;
25586
25587 /* There is one cycle extra latency between an FP op and a store. */
25588 if (insn_type == TYPE_FMOV
25589 && (set = single_set (dep_insn)) != NULL_RTX
25590 && (set2 = single_set (insn)) != NULL_RTX
25591 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25592 && MEM_P (SET_DEST (set2)))
25593 cost += 1;
25594
25595 memory = get_attr_memory (insn);
25596
25597 /* Show ability of reorder buffer to hide latency of load by executing
25598 in parallel with previous instruction in case
25599 previous instruction is not needed to compute the address. */
25600 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25601 && !ix86_agi_dependent (dep_insn, insn))
25602 {
25603 /* Claim moves to take one cycle, as core can issue one load
25604 at time and the next load can start cycle later. */
25605 if (dep_insn_type == TYPE_IMOV
25606 || dep_insn_type == TYPE_FMOV)
25607 cost = 1;
25608 else if (cost > 1)
25609 cost--;
25610 }
25611 break;
25612
25613 case PROCESSOR_K6:
25614 /* The esp dependency is resolved before
25615 the instruction is really finished. */
25616 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25617 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25618 return 1;
25619
25620 /* INT->FP conversion is expensive. */
25621 if (get_attr_fp_int_src (dep_insn))
25622 cost += 5;
25623
25624 memory = get_attr_memory (insn);
25625
25626 /* Show ability of reorder buffer to hide latency of load by executing
25627 in parallel with previous instruction in case
25628 previous instruction is not needed to compute the address. */
25629 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25630 && !ix86_agi_dependent (dep_insn, insn))
25631 {
25632 /* Claim moves to take one cycle, as core can issue one load
25633 at time and the next load can start cycle later. */
25634 if (dep_insn_type == TYPE_IMOV
25635 || dep_insn_type == TYPE_FMOV)
25636 cost = 1;
25637 else if (cost > 2)
25638 cost -= 2;
25639 else
25640 cost = 1;
25641 }
25642 break;
25643
25644 case PROCESSOR_AMDFAM10:
25645 case PROCESSOR_BDVER1:
25646 case PROCESSOR_BDVER2:
25647 case PROCESSOR_BDVER3:
25648 case PROCESSOR_BDVER4:
25649 case PROCESSOR_BTVER1:
25650 case PROCESSOR_BTVER2:
25651 case PROCESSOR_GENERIC:
25652 /* Stack engine allows to execute push&pop instructions in parall. */
25653 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25654 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25655 return 0;
25656 /* FALLTHRU */
25657
25658 case PROCESSOR_ATHLON:
25659 case PROCESSOR_K8:
25660 memory = get_attr_memory (insn);
25661
25662 /* Show ability of reorder buffer to hide latency of load by executing
25663 in parallel with previous instruction in case
25664 previous instruction is not needed to compute the address. */
25665 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25666 && !ix86_agi_dependent (dep_insn, insn))
25667 {
25668 enum attr_unit unit = get_attr_unit (insn);
25669 int loadcost = 3;
25670
25671 /* Because of the difference between the length of integer and
25672 floating unit pipeline preparation stages, the memory operands
25673 for floating point are cheaper.
25674
25675 ??? For Athlon it the difference is most probably 2. */
25676 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25677 loadcost = 3;
25678 else
25679 loadcost = TARGET_ATHLON ? 2 : 0;
25680
25681 if (cost >= loadcost)
25682 cost -= loadcost;
25683 else
25684 cost = 0;
25685 }
25686 break;
25687
25688 case PROCESSOR_CORE2:
25689 case PROCESSOR_NEHALEM:
25690 case PROCESSOR_SANDYBRIDGE:
25691 case PROCESSOR_HASWELL:
25692 /* Stack engine allows to execute push&pop instructions in parall. */
25693 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25694 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25695 return 0;
25696
25697 memory = get_attr_memory (insn);
25698
25699 /* Show ability of reorder buffer to hide latency of load by executing
25700 in parallel with previous instruction in case
25701 previous instruction is not needed to compute the address. */
25702 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25703 && !ix86_agi_dependent (dep_insn, insn))
25704 {
25705 if (cost >= 4)
25706 cost -= 4;
25707 else
25708 cost = 0;
25709 }
25710 break;
25711
25712 case PROCESSOR_SILVERMONT:
25713 case PROCESSOR_INTEL:
25714 if (!reload_completed)
25715 return cost;
25716
25717 /* Increase cost of integer loads. */
25718 memory = get_attr_memory (dep_insn);
25719 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25720 {
25721 enum attr_unit unit = get_attr_unit (dep_insn);
25722 if (unit == UNIT_INTEGER && cost == 1)
25723 {
25724 if (memory == MEMORY_LOAD)
25725 cost = 3;
25726 else
25727 {
25728 /* Increase cost of ld/st for short int types only
25729 because of store forwarding issue. */
25730 rtx set = single_set (dep_insn);
25731 if (set && (GET_MODE (SET_DEST (set)) == QImode
25732 || GET_MODE (SET_DEST (set)) == HImode))
25733 {
25734 /* Increase cost of store/load insn if exact
25735 dependence exists and it is load insn. */
25736 enum attr_memory insn_memory = get_attr_memory (insn);
25737 if (insn_memory == MEMORY_LOAD
25738 && exact_store_load_dependency (dep_insn, insn))
25739 cost = 3;
25740 }
25741 }
25742 }
25743 }
25744
25745 default:
25746 break;
25747 }
25748
25749 return cost;
25750 }
25751
25752 /* How many alternative schedules to try. This should be as wide as the
25753 scheduling freedom in the DFA, but no wider. Making this value too
25754 large results extra work for the scheduler. */
25755
25756 static int
25757 ia32_multipass_dfa_lookahead (void)
25758 {
25759 switch (ix86_tune)
25760 {
25761 case PROCESSOR_PENTIUM:
25762 return 2;
25763
25764 case PROCESSOR_PENTIUMPRO:
25765 case PROCESSOR_K6:
25766 return 1;
25767
25768 case PROCESSOR_BDVER1:
25769 case PROCESSOR_BDVER2:
25770 case PROCESSOR_BDVER3:
25771 case PROCESSOR_BDVER4:
25772 /* We use lookahead value 4 for BD both before and after reload
25773 schedules. Plan is to have value 8 included for O3. */
25774 return 4;
25775
25776 case PROCESSOR_CORE2:
25777 case PROCESSOR_NEHALEM:
25778 case PROCESSOR_SANDYBRIDGE:
25779 case PROCESSOR_HASWELL:
25780 case PROCESSOR_BONNELL:
25781 case PROCESSOR_SILVERMONT:
25782 case PROCESSOR_INTEL:
25783 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25784 as many instructions can be executed on a cycle, i.e.,
25785 issue_rate. I wonder why tuning for many CPUs does not do this. */
25786 if (reload_completed)
25787 return ix86_issue_rate ();
25788 /* Don't use lookahead for pre-reload schedule to save compile time. */
25789 return 0;
25790
25791 default:
25792 return 0;
25793 }
25794 }
25795
25796 /* Return true if target platform supports macro-fusion. */
25797
25798 static bool
25799 ix86_macro_fusion_p ()
25800 {
25801 return TARGET_FUSE_CMP_AND_BRANCH;
25802 }
25803
25804 /* Check whether current microarchitecture support macro fusion
25805 for insn pair "CONDGEN + CONDJMP". Refer to
25806 "Intel Architectures Optimization Reference Manual". */
25807
25808 static bool
25809 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25810 {
25811 rtx src, dest;
25812 rtx single_set = single_set (condgen);
25813 enum rtx_code ccode;
25814 rtx compare_set = NULL_RTX, test_if, cond;
25815 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25816
25817 if (!any_condjump_p (condjmp))
25818 return false;
25819
25820 if (get_attr_type (condgen) != TYPE_TEST
25821 && get_attr_type (condgen) != TYPE_ICMP
25822 && get_attr_type (condgen) != TYPE_INCDEC
25823 && get_attr_type (condgen) != TYPE_ALU)
25824 return false;
25825
25826 if (single_set == NULL_RTX
25827 && !TARGET_FUSE_ALU_AND_BRANCH)
25828 return false;
25829
25830 if (single_set != NULL_RTX)
25831 compare_set = single_set;
25832 else
25833 {
25834 int i;
25835 rtx pat = PATTERN (condgen);
25836 for (i = 0; i < XVECLEN (pat, 0); i++)
25837 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25838 {
25839 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25840 if (GET_CODE (set_src) == COMPARE)
25841 compare_set = XVECEXP (pat, 0, i);
25842 else
25843 alu_set = XVECEXP (pat, 0, i);
25844 }
25845 }
25846 if (compare_set == NULL_RTX)
25847 return false;
25848 src = SET_SRC (compare_set);
25849 if (GET_CODE (src) != COMPARE)
25850 return false;
25851
25852 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25853 supported. */
25854 if ((MEM_P (XEXP (src, 0))
25855 && CONST_INT_P (XEXP (src, 1)))
25856 || (MEM_P (XEXP (src, 1))
25857 && CONST_INT_P (XEXP (src, 0))))
25858 return false;
25859
25860 /* No fusion for RIP-relative address. */
25861 if (MEM_P (XEXP (src, 0)))
25862 addr = XEXP (XEXP (src, 0), 0);
25863 else if (MEM_P (XEXP (src, 1)))
25864 addr = XEXP (XEXP (src, 1), 0);
25865
25866 if (addr) {
25867 ix86_address parts;
25868 int ok = ix86_decompose_address (addr, &parts);
25869 gcc_assert (ok);
25870
25871 if (rip_relative_addr_p (&parts))
25872 return false;
25873 }
25874
25875 test_if = SET_SRC (pc_set (condjmp));
25876 cond = XEXP (test_if, 0);
25877 ccode = GET_CODE (cond);
25878 /* Check whether conditional jump use Sign or Overflow Flags. */
25879 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25880 && (ccode == GE
25881 || ccode == GT
25882 || ccode == LE
25883 || ccode == LT))
25884 return false;
25885
25886 /* Return true for TYPE_TEST and TYPE_ICMP. */
25887 if (get_attr_type (condgen) == TYPE_TEST
25888 || get_attr_type (condgen) == TYPE_ICMP)
25889 return true;
25890
25891 /* The following is the case that macro-fusion for alu + jmp. */
25892 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25893 return false;
25894
25895 /* No fusion for alu op with memory destination operand. */
25896 dest = SET_DEST (alu_set);
25897 if (MEM_P (dest))
25898 return false;
25899
25900 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25901 supported. */
25902 if (get_attr_type (condgen) == TYPE_INCDEC
25903 && (ccode == GEU
25904 || ccode == GTU
25905 || ccode == LEU
25906 || ccode == LTU))
25907 return false;
25908
25909 return true;
25910 }
25911
25912 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25913 execution. It is applied if
25914 (1) IMUL instruction is on the top of list;
25915 (2) There exists the only producer of independent IMUL instruction in
25916 ready list.
25917 Return index of IMUL producer if it was found and -1 otherwise. */
25918 static int
25919 do_reorder_for_imul (rtx *ready, int n_ready)
25920 {
25921 rtx insn, set, insn1, insn2;
25922 sd_iterator_def sd_it;
25923 dep_t dep;
25924 int index = -1;
25925 int i;
25926
25927 if (!TARGET_BONNELL)
25928 return index;
25929
25930 /* Check that IMUL instruction is on the top of ready list. */
25931 insn = ready[n_ready - 1];
25932 set = single_set (insn);
25933 if (!set)
25934 return index;
25935 if (!(GET_CODE (SET_SRC (set)) == MULT
25936 && GET_MODE (SET_SRC (set)) == SImode))
25937 return index;
25938
25939 /* Search for producer of independent IMUL instruction. */
25940 for (i = n_ready - 2; i >= 0; i--)
25941 {
25942 insn = ready[i];
25943 if (!NONDEBUG_INSN_P (insn))
25944 continue;
25945 /* Skip IMUL instruction. */
25946 insn2 = PATTERN (insn);
25947 if (GET_CODE (insn2) == PARALLEL)
25948 insn2 = XVECEXP (insn2, 0, 0);
25949 if (GET_CODE (insn2) == SET
25950 && GET_CODE (SET_SRC (insn2)) == MULT
25951 && GET_MODE (SET_SRC (insn2)) == SImode)
25952 continue;
25953
25954 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25955 {
25956 rtx con;
25957 con = DEP_CON (dep);
25958 if (!NONDEBUG_INSN_P (con))
25959 continue;
25960 insn1 = PATTERN (con);
25961 if (GET_CODE (insn1) == PARALLEL)
25962 insn1 = XVECEXP (insn1, 0, 0);
25963
25964 if (GET_CODE (insn1) == SET
25965 && GET_CODE (SET_SRC (insn1)) == MULT
25966 && GET_MODE (SET_SRC (insn1)) == SImode)
25967 {
25968 sd_iterator_def sd_it1;
25969 dep_t dep1;
25970 /* Check if there is no other dependee for IMUL. */
25971 index = i;
25972 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25973 {
25974 rtx pro;
25975 pro = DEP_PRO (dep1);
25976 if (!NONDEBUG_INSN_P (pro))
25977 continue;
25978 if (pro != insn)
25979 index = -1;
25980 }
25981 if (index >= 0)
25982 break;
25983 }
25984 }
25985 if (index >= 0)
25986 break;
25987 }
25988 return index;
25989 }
25990
25991 /* Try to find the best candidate on the top of ready list if two insns
25992 have the same priority - candidate is best if its dependees were
25993 scheduled earlier. Applied for Silvermont only.
25994 Return true if top 2 insns must be interchanged. */
25995 static bool
25996 swap_top_of_ready_list (rtx *ready, int n_ready)
25997 {
25998 rtx top = ready[n_ready - 1];
25999 rtx next = ready[n_ready - 2];
26000 rtx set;
26001 sd_iterator_def sd_it;
26002 dep_t dep;
26003 int clock1 = -1;
26004 int clock2 = -1;
26005 #define INSN_TICK(INSN) (HID (INSN)->tick)
26006
26007 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26008 return false;
26009
26010 if (!NONDEBUG_INSN_P (top))
26011 return false;
26012 if (!NONJUMP_INSN_P (top))
26013 return false;
26014 if (!NONDEBUG_INSN_P (next))
26015 return false;
26016 if (!NONJUMP_INSN_P (next))
26017 return false;
26018 set = single_set (top);
26019 if (!set)
26020 return false;
26021 set = single_set (next);
26022 if (!set)
26023 return false;
26024
26025 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26026 {
26027 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26028 return false;
26029 /* Determine winner more precise. */
26030 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26031 {
26032 rtx pro;
26033 pro = DEP_PRO (dep);
26034 if (!NONDEBUG_INSN_P (pro))
26035 continue;
26036 if (INSN_TICK (pro) > clock1)
26037 clock1 = INSN_TICK (pro);
26038 }
26039 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26040 {
26041 rtx pro;
26042 pro = DEP_PRO (dep);
26043 if (!NONDEBUG_INSN_P (pro))
26044 continue;
26045 if (INSN_TICK (pro) > clock2)
26046 clock2 = INSN_TICK (pro);
26047 }
26048
26049 if (clock1 == clock2)
26050 {
26051 /* Determine winner - load must win. */
26052 enum attr_memory memory1, memory2;
26053 memory1 = get_attr_memory (top);
26054 memory2 = get_attr_memory (next);
26055 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26056 return true;
26057 }
26058 return (bool) (clock2 < clock1);
26059 }
26060 return false;
26061 #undef INSN_TICK
26062 }
26063
26064 /* Perform possible reodering of ready list for Atom/Silvermont only.
26065 Return issue rate. */
26066 static int
26067 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26068 int clock_var)
26069 {
26070 int issue_rate = -1;
26071 int n_ready = *pn_ready;
26072 int i;
26073 rtx insn;
26074 int index = -1;
26075
26076 /* Set up issue rate. */
26077 issue_rate = ix86_issue_rate ();
26078
26079 /* Do reodering for BONNELL/SILVERMONT only. */
26080 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26081 return issue_rate;
26082
26083 /* Nothing to do if ready list contains only 1 instruction. */
26084 if (n_ready <= 1)
26085 return issue_rate;
26086
26087 /* Do reodering for post-reload scheduler only. */
26088 if (!reload_completed)
26089 return issue_rate;
26090
26091 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26092 {
26093 if (sched_verbose > 1)
26094 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26095 INSN_UID (ready[index]));
26096
26097 /* Put IMUL producer (ready[index]) at the top of ready list. */
26098 insn = ready[index];
26099 for (i = index; i < n_ready - 1; i++)
26100 ready[i] = ready[i + 1];
26101 ready[n_ready - 1] = insn;
26102 return issue_rate;
26103 }
26104 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26105 {
26106 if (sched_verbose > 1)
26107 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26108 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26109 /* Swap 2 top elements of ready list. */
26110 insn = ready[n_ready - 1];
26111 ready[n_ready - 1] = ready[n_ready - 2];
26112 ready[n_ready - 2] = insn;
26113 }
26114 return issue_rate;
26115 }
26116
26117 static bool
26118 ix86_class_likely_spilled_p (reg_class_t);
26119
26120 /* Returns true if lhs of insn is HW function argument register and set up
26121 is_spilled to true if it is likely spilled HW register. */
26122 static bool
26123 insn_is_function_arg (rtx insn, bool* is_spilled)
26124 {
26125 rtx dst;
26126
26127 if (!NONDEBUG_INSN_P (insn))
26128 return false;
26129 /* Call instructions are not movable, ignore it. */
26130 if (CALL_P (insn))
26131 return false;
26132 insn = PATTERN (insn);
26133 if (GET_CODE (insn) == PARALLEL)
26134 insn = XVECEXP (insn, 0, 0);
26135 if (GET_CODE (insn) != SET)
26136 return false;
26137 dst = SET_DEST (insn);
26138 if (REG_P (dst) && HARD_REGISTER_P (dst)
26139 && ix86_function_arg_regno_p (REGNO (dst)))
26140 {
26141 /* Is it likely spilled HW register? */
26142 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26143 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26144 *is_spilled = true;
26145 return true;
26146 }
26147 return false;
26148 }
26149
26150 /* Add output dependencies for chain of function adjacent arguments if only
26151 there is a move to likely spilled HW register. Return first argument
26152 if at least one dependence was added or NULL otherwise. */
26153 static rtx
26154 add_parameter_dependencies (rtx call, rtx head)
26155 {
26156 rtx insn;
26157 rtx last = call;
26158 rtx first_arg = NULL;
26159 bool is_spilled = false;
26160
26161 head = PREV_INSN (head);
26162
26163 /* Find nearest to call argument passing instruction. */
26164 while (true)
26165 {
26166 last = PREV_INSN (last);
26167 if (last == head)
26168 return NULL;
26169 if (!NONDEBUG_INSN_P (last))
26170 continue;
26171 if (insn_is_function_arg (last, &is_spilled))
26172 break;
26173 return NULL;
26174 }
26175
26176 first_arg = last;
26177 while (true)
26178 {
26179 insn = PREV_INSN (last);
26180 if (!INSN_P (insn))
26181 break;
26182 if (insn == head)
26183 break;
26184 if (!NONDEBUG_INSN_P (insn))
26185 {
26186 last = insn;
26187 continue;
26188 }
26189 if (insn_is_function_arg (insn, &is_spilled))
26190 {
26191 /* Add output depdendence between two function arguments if chain
26192 of output arguments contains likely spilled HW registers. */
26193 if (is_spilled)
26194 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26195 first_arg = last = insn;
26196 }
26197 else
26198 break;
26199 }
26200 if (!is_spilled)
26201 return NULL;
26202 return first_arg;
26203 }
26204
26205 /* Add output or anti dependency from insn to first_arg to restrict its code
26206 motion. */
26207 static void
26208 avoid_func_arg_motion (rtx first_arg, rtx insn)
26209 {
26210 rtx set;
26211 rtx tmp;
26212
26213 set = single_set (insn);
26214 if (!set)
26215 return;
26216 tmp = SET_DEST (set);
26217 if (REG_P (tmp))
26218 {
26219 /* Add output dependency to the first function argument. */
26220 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26221 return;
26222 }
26223 /* Add anti dependency. */
26224 add_dependence (first_arg, insn, REG_DEP_ANTI);
26225 }
26226
26227 /* Avoid cross block motion of function argument through adding dependency
26228 from the first non-jump instruction in bb. */
26229 static void
26230 add_dependee_for_func_arg (rtx arg, basic_block bb)
26231 {
26232 rtx insn = BB_END (bb);
26233
26234 while (insn)
26235 {
26236 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26237 {
26238 rtx set = single_set (insn);
26239 if (set)
26240 {
26241 avoid_func_arg_motion (arg, insn);
26242 return;
26243 }
26244 }
26245 if (insn == BB_HEAD (bb))
26246 return;
26247 insn = PREV_INSN (insn);
26248 }
26249 }
26250
26251 /* Hook for pre-reload schedule - avoid motion of function arguments
26252 passed in likely spilled HW registers. */
26253 static void
26254 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26255 {
26256 rtx insn;
26257 rtx first_arg = NULL;
26258 if (reload_completed)
26259 return;
26260 while (head != tail && DEBUG_INSN_P (head))
26261 head = NEXT_INSN (head);
26262 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26263 if (INSN_P (insn) && CALL_P (insn))
26264 {
26265 first_arg = add_parameter_dependencies (insn, head);
26266 if (first_arg)
26267 {
26268 /* Add dependee for first argument to predecessors if only
26269 region contains more than one block. */
26270 basic_block bb = BLOCK_FOR_INSN (insn);
26271 int rgn = CONTAINING_RGN (bb->index);
26272 int nr_blks = RGN_NR_BLOCKS (rgn);
26273 /* Skip trivial regions and region head blocks that can have
26274 predecessors outside of region. */
26275 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26276 {
26277 edge e;
26278 edge_iterator ei;
26279
26280 /* Regions are SCCs with the exception of selective
26281 scheduling with pipelining of outer blocks enabled.
26282 So also check that immediate predecessors of a non-head
26283 block are in the same region. */
26284 FOR_EACH_EDGE (e, ei, bb->preds)
26285 {
26286 /* Avoid creating of loop-carried dependencies through
26287 using topological ordering in the region. */
26288 if (rgn == CONTAINING_RGN (e->src->index)
26289 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26290 add_dependee_for_func_arg (first_arg, e->src);
26291 }
26292 }
26293 insn = first_arg;
26294 if (insn == head)
26295 break;
26296 }
26297 }
26298 else if (first_arg)
26299 avoid_func_arg_motion (first_arg, insn);
26300 }
26301
26302 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26303 HW registers to maximum, to schedule them at soon as possible. These are
26304 moves from function argument registers at the top of the function entry
26305 and moves from function return value registers after call. */
26306 static int
26307 ix86_adjust_priority (rtx insn, int priority)
26308 {
26309 rtx set;
26310
26311 if (reload_completed)
26312 return priority;
26313
26314 if (!NONDEBUG_INSN_P (insn))
26315 return priority;
26316
26317 set = single_set (insn);
26318 if (set)
26319 {
26320 rtx tmp = SET_SRC (set);
26321 if (REG_P (tmp)
26322 && HARD_REGISTER_P (tmp)
26323 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26324 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26325 return current_sched_info->sched_max_insns_priority;
26326 }
26327
26328 return priority;
26329 }
26330
26331 /* Model decoder of Core 2/i7.
26332 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26333 track the instruction fetch block boundaries and make sure that long
26334 (9+ bytes) instructions are assigned to D0. */
26335
26336 /* Maximum length of an insn that can be handled by
26337 a secondary decoder unit. '8' for Core 2/i7. */
26338 static int core2i7_secondary_decoder_max_insn_size;
26339
26340 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26341 '16' for Core 2/i7. */
26342 static int core2i7_ifetch_block_size;
26343
26344 /* Maximum number of instructions decoder can handle per cycle.
26345 '6' for Core 2/i7. */
26346 static int core2i7_ifetch_block_max_insns;
26347
26348 typedef struct ix86_first_cycle_multipass_data_ *
26349 ix86_first_cycle_multipass_data_t;
26350 typedef const struct ix86_first_cycle_multipass_data_ *
26351 const_ix86_first_cycle_multipass_data_t;
26352
26353 /* A variable to store target state across calls to max_issue within
26354 one cycle. */
26355 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26356 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26357
26358 /* Initialize DATA. */
26359 static void
26360 core2i7_first_cycle_multipass_init (void *_data)
26361 {
26362 ix86_first_cycle_multipass_data_t data
26363 = (ix86_first_cycle_multipass_data_t) _data;
26364
26365 data->ifetch_block_len = 0;
26366 data->ifetch_block_n_insns = 0;
26367 data->ready_try_change = NULL;
26368 data->ready_try_change_size = 0;
26369 }
26370
26371 /* Advancing the cycle; reset ifetch block counts. */
26372 static void
26373 core2i7_dfa_post_advance_cycle (void)
26374 {
26375 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26376
26377 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26378
26379 data->ifetch_block_len = 0;
26380 data->ifetch_block_n_insns = 0;
26381 }
26382
26383 static int min_insn_size (rtx);
26384
26385 /* Filter out insns from ready_try that the core will not be able to issue
26386 on current cycle due to decoder. */
26387 static void
26388 core2i7_first_cycle_multipass_filter_ready_try
26389 (const_ix86_first_cycle_multipass_data_t data,
26390 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26391 {
26392 while (n_ready--)
26393 {
26394 rtx insn;
26395 int insn_size;
26396
26397 if (ready_try[n_ready])
26398 continue;
26399
26400 insn = get_ready_element (n_ready);
26401 insn_size = min_insn_size (insn);
26402
26403 if (/* If this is a too long an insn for a secondary decoder ... */
26404 (!first_cycle_insn_p
26405 && insn_size > core2i7_secondary_decoder_max_insn_size)
26406 /* ... or it would not fit into the ifetch block ... */
26407 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26408 /* ... or the decoder is full already ... */
26409 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26410 /* ... mask the insn out. */
26411 {
26412 ready_try[n_ready] = 1;
26413
26414 if (data->ready_try_change)
26415 bitmap_set_bit (data->ready_try_change, n_ready);
26416 }
26417 }
26418 }
26419
26420 /* Prepare for a new round of multipass lookahead scheduling. */
26421 static void
26422 core2i7_first_cycle_multipass_begin (void *_data,
26423 signed char *ready_try, int n_ready,
26424 bool first_cycle_insn_p)
26425 {
26426 ix86_first_cycle_multipass_data_t data
26427 = (ix86_first_cycle_multipass_data_t) _data;
26428 const_ix86_first_cycle_multipass_data_t prev_data
26429 = ix86_first_cycle_multipass_data;
26430
26431 /* Restore the state from the end of the previous round. */
26432 data->ifetch_block_len = prev_data->ifetch_block_len;
26433 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26434
26435 /* Filter instructions that cannot be issued on current cycle due to
26436 decoder restrictions. */
26437 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26438 first_cycle_insn_p);
26439 }
26440
26441 /* INSN is being issued in current solution. Account for its impact on
26442 the decoder model. */
26443 static void
26444 core2i7_first_cycle_multipass_issue (void *_data,
26445 signed char *ready_try, int n_ready,
26446 rtx insn, const void *_prev_data)
26447 {
26448 ix86_first_cycle_multipass_data_t data
26449 = (ix86_first_cycle_multipass_data_t) _data;
26450 const_ix86_first_cycle_multipass_data_t prev_data
26451 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26452
26453 int insn_size = min_insn_size (insn);
26454
26455 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26456 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26457 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26458 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26459
26460 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26461 if (!data->ready_try_change)
26462 {
26463 data->ready_try_change = sbitmap_alloc (n_ready);
26464 data->ready_try_change_size = n_ready;
26465 }
26466 else if (data->ready_try_change_size < n_ready)
26467 {
26468 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26469 n_ready, 0);
26470 data->ready_try_change_size = n_ready;
26471 }
26472 bitmap_clear (data->ready_try_change);
26473
26474 /* Filter out insns from ready_try that the core will not be able to issue
26475 on current cycle due to decoder. */
26476 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26477 false);
26478 }
26479
26480 /* Revert the effect on ready_try. */
26481 static void
26482 core2i7_first_cycle_multipass_backtrack (const void *_data,
26483 signed char *ready_try,
26484 int n_ready ATTRIBUTE_UNUSED)
26485 {
26486 const_ix86_first_cycle_multipass_data_t data
26487 = (const_ix86_first_cycle_multipass_data_t) _data;
26488 unsigned int i = 0;
26489 sbitmap_iterator sbi;
26490
26491 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26492 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26493 {
26494 ready_try[i] = 0;
26495 }
26496 }
26497
26498 /* Save the result of multipass lookahead scheduling for the next round. */
26499 static void
26500 core2i7_first_cycle_multipass_end (const void *_data)
26501 {
26502 const_ix86_first_cycle_multipass_data_t data
26503 = (const_ix86_first_cycle_multipass_data_t) _data;
26504 ix86_first_cycle_multipass_data_t next_data
26505 = ix86_first_cycle_multipass_data;
26506
26507 if (data != NULL)
26508 {
26509 next_data->ifetch_block_len = data->ifetch_block_len;
26510 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26511 }
26512 }
26513
26514 /* Deallocate target data. */
26515 static void
26516 core2i7_first_cycle_multipass_fini (void *_data)
26517 {
26518 ix86_first_cycle_multipass_data_t data
26519 = (ix86_first_cycle_multipass_data_t) _data;
26520
26521 if (data->ready_try_change)
26522 {
26523 sbitmap_free (data->ready_try_change);
26524 data->ready_try_change = NULL;
26525 data->ready_try_change_size = 0;
26526 }
26527 }
26528
26529 /* Prepare for scheduling pass. */
26530 static void
26531 ix86_sched_init_global (FILE *, int, int)
26532 {
26533 /* Install scheduling hooks for current CPU. Some of these hooks are used
26534 in time-critical parts of the scheduler, so we only set them up when
26535 they are actually used. */
26536 switch (ix86_tune)
26537 {
26538 case PROCESSOR_CORE2:
26539 case PROCESSOR_NEHALEM:
26540 case PROCESSOR_SANDYBRIDGE:
26541 case PROCESSOR_HASWELL:
26542 /* Do not perform multipass scheduling for pre-reload schedule
26543 to save compile time. */
26544 if (reload_completed)
26545 {
26546 targetm.sched.dfa_post_advance_cycle
26547 = core2i7_dfa_post_advance_cycle;
26548 targetm.sched.first_cycle_multipass_init
26549 = core2i7_first_cycle_multipass_init;
26550 targetm.sched.first_cycle_multipass_begin
26551 = core2i7_first_cycle_multipass_begin;
26552 targetm.sched.first_cycle_multipass_issue
26553 = core2i7_first_cycle_multipass_issue;
26554 targetm.sched.first_cycle_multipass_backtrack
26555 = core2i7_first_cycle_multipass_backtrack;
26556 targetm.sched.first_cycle_multipass_end
26557 = core2i7_first_cycle_multipass_end;
26558 targetm.sched.first_cycle_multipass_fini
26559 = core2i7_first_cycle_multipass_fini;
26560
26561 /* Set decoder parameters. */
26562 core2i7_secondary_decoder_max_insn_size = 8;
26563 core2i7_ifetch_block_size = 16;
26564 core2i7_ifetch_block_max_insns = 6;
26565 break;
26566 }
26567 /* ... Fall through ... */
26568 default:
26569 targetm.sched.dfa_post_advance_cycle = NULL;
26570 targetm.sched.first_cycle_multipass_init = NULL;
26571 targetm.sched.first_cycle_multipass_begin = NULL;
26572 targetm.sched.first_cycle_multipass_issue = NULL;
26573 targetm.sched.first_cycle_multipass_backtrack = NULL;
26574 targetm.sched.first_cycle_multipass_end = NULL;
26575 targetm.sched.first_cycle_multipass_fini = NULL;
26576 break;
26577 }
26578 }
26579
26580 \f
26581 /* Compute the alignment given to a constant that is being placed in memory.
26582 EXP is the constant and ALIGN is the alignment that the object would
26583 ordinarily have.
26584 The value of this function is used instead of that alignment to align
26585 the object. */
26586
26587 int
26588 ix86_constant_alignment (tree exp, int align)
26589 {
26590 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26591 || TREE_CODE (exp) == INTEGER_CST)
26592 {
26593 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26594 return 64;
26595 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26596 return 128;
26597 }
26598 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26599 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26600 return BITS_PER_WORD;
26601
26602 return align;
26603 }
26604
26605 /* Compute the alignment for a static variable.
26606 TYPE is the data type, and ALIGN is the alignment that
26607 the object would ordinarily have. The value of this function is used
26608 instead of that alignment to align the object. */
26609
26610 int
26611 ix86_data_alignment (tree type, int align, bool opt)
26612 {
26613 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26614 for symbols from other compilation units or symbols that don't need
26615 to bind locally. In order to preserve some ABI compatibility with
26616 those compilers, ensure we don't decrease alignment from what we
26617 used to assume. */
26618
26619 int max_align_compat
26620 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26621
26622 /* A data structure, equal or greater than the size of a cache line
26623 (64 bytes in the Pentium 4 and other recent Intel processors, including
26624 processors based on Intel Core microarchitecture) should be aligned
26625 so that its base address is a multiple of a cache line size. */
26626
26627 int max_align
26628 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26629
26630 if (max_align < BITS_PER_WORD)
26631 max_align = BITS_PER_WORD;
26632
26633 if (opt
26634 && AGGREGATE_TYPE_P (type)
26635 && TYPE_SIZE (type)
26636 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26637 {
26638 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26639 && align < max_align_compat)
26640 align = max_align_compat;
26641 if (wi::geu_p (TYPE_SIZE (type), max_align)
26642 && align < max_align)
26643 align = max_align;
26644 }
26645
26646 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26647 to 16byte boundary. */
26648 if (TARGET_64BIT)
26649 {
26650 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26651 && TYPE_SIZE (type)
26652 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26653 && wi::geu_p (TYPE_SIZE (type), 128)
26654 && align < 128)
26655 return 128;
26656 }
26657
26658 if (!opt)
26659 return align;
26660
26661 if (TREE_CODE (type) == ARRAY_TYPE)
26662 {
26663 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26664 return 64;
26665 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26666 return 128;
26667 }
26668 else if (TREE_CODE (type) == COMPLEX_TYPE)
26669 {
26670
26671 if (TYPE_MODE (type) == DCmode && align < 64)
26672 return 64;
26673 if ((TYPE_MODE (type) == XCmode
26674 || TYPE_MODE (type) == TCmode) && align < 128)
26675 return 128;
26676 }
26677 else if ((TREE_CODE (type) == RECORD_TYPE
26678 || TREE_CODE (type) == UNION_TYPE
26679 || TREE_CODE (type) == QUAL_UNION_TYPE)
26680 && TYPE_FIELDS (type))
26681 {
26682 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26683 return 64;
26684 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26685 return 128;
26686 }
26687 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26688 || TREE_CODE (type) == INTEGER_TYPE)
26689 {
26690 if (TYPE_MODE (type) == DFmode && align < 64)
26691 return 64;
26692 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26693 return 128;
26694 }
26695
26696 return align;
26697 }
26698
26699 /* Compute the alignment for a local variable or a stack slot. EXP is
26700 the data type or decl itself, MODE is the widest mode available and
26701 ALIGN is the alignment that the object would ordinarily have. The
26702 value of this macro is used instead of that alignment to align the
26703 object. */
26704
26705 unsigned int
26706 ix86_local_alignment (tree exp, enum machine_mode mode,
26707 unsigned int align)
26708 {
26709 tree type, decl;
26710
26711 if (exp && DECL_P (exp))
26712 {
26713 type = TREE_TYPE (exp);
26714 decl = exp;
26715 }
26716 else
26717 {
26718 type = exp;
26719 decl = NULL;
26720 }
26721
26722 /* Don't do dynamic stack realignment for long long objects with
26723 -mpreferred-stack-boundary=2. */
26724 if (!TARGET_64BIT
26725 && align == 64
26726 && ix86_preferred_stack_boundary < 64
26727 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26728 && (!type || !TYPE_USER_ALIGN (type))
26729 && (!decl || !DECL_USER_ALIGN (decl)))
26730 align = 32;
26731
26732 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26733 register in MODE. We will return the largest alignment of XF
26734 and DF. */
26735 if (!type)
26736 {
26737 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26738 align = GET_MODE_ALIGNMENT (DFmode);
26739 return align;
26740 }
26741
26742 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26743 to 16byte boundary. Exact wording is:
26744
26745 An array uses the same alignment as its elements, except that a local or
26746 global array variable of length at least 16 bytes or
26747 a C99 variable-length array variable always has alignment of at least 16 bytes.
26748
26749 This was added to allow use of aligned SSE instructions at arrays. This
26750 rule is meant for static storage (where compiler can not do the analysis
26751 by itself). We follow it for automatic variables only when convenient.
26752 We fully control everything in the function compiled and functions from
26753 other unit can not rely on the alignment.
26754
26755 Exclude va_list type. It is the common case of local array where
26756 we can not benefit from the alignment.
26757
26758 TODO: Probably one should optimize for size only when var is not escaping. */
26759 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26760 && TARGET_SSE)
26761 {
26762 if (AGGREGATE_TYPE_P (type)
26763 && (va_list_type_node == NULL_TREE
26764 || (TYPE_MAIN_VARIANT (type)
26765 != TYPE_MAIN_VARIANT (va_list_type_node)))
26766 && TYPE_SIZE (type)
26767 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26768 && wi::geu_p (TYPE_SIZE (type), 16)
26769 && align < 128)
26770 return 128;
26771 }
26772 if (TREE_CODE (type) == ARRAY_TYPE)
26773 {
26774 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26775 return 64;
26776 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26777 return 128;
26778 }
26779 else if (TREE_CODE (type) == COMPLEX_TYPE)
26780 {
26781 if (TYPE_MODE (type) == DCmode && align < 64)
26782 return 64;
26783 if ((TYPE_MODE (type) == XCmode
26784 || TYPE_MODE (type) == TCmode) && align < 128)
26785 return 128;
26786 }
26787 else if ((TREE_CODE (type) == RECORD_TYPE
26788 || TREE_CODE (type) == UNION_TYPE
26789 || TREE_CODE (type) == QUAL_UNION_TYPE)
26790 && TYPE_FIELDS (type))
26791 {
26792 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26793 return 64;
26794 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26795 return 128;
26796 }
26797 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26798 || TREE_CODE (type) == INTEGER_TYPE)
26799 {
26800
26801 if (TYPE_MODE (type) == DFmode && align < 64)
26802 return 64;
26803 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26804 return 128;
26805 }
26806 return align;
26807 }
26808
26809 /* Compute the minimum required alignment for dynamic stack realignment
26810 purposes for a local variable, parameter or a stack slot. EXP is
26811 the data type or decl itself, MODE is its mode and ALIGN is the
26812 alignment that the object would ordinarily have. */
26813
26814 unsigned int
26815 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26816 unsigned int align)
26817 {
26818 tree type, decl;
26819
26820 if (exp && DECL_P (exp))
26821 {
26822 type = TREE_TYPE (exp);
26823 decl = exp;
26824 }
26825 else
26826 {
26827 type = exp;
26828 decl = NULL;
26829 }
26830
26831 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26832 return align;
26833
26834 /* Don't do dynamic stack realignment for long long objects with
26835 -mpreferred-stack-boundary=2. */
26836 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26837 && (!type || !TYPE_USER_ALIGN (type))
26838 && (!decl || !DECL_USER_ALIGN (decl)))
26839 return 32;
26840
26841 return align;
26842 }
26843 \f
26844 /* Find a location for the static chain incoming to a nested function.
26845 This is a register, unless all free registers are used by arguments. */
26846
26847 static rtx
26848 ix86_static_chain (const_tree fndecl, bool incoming_p)
26849 {
26850 unsigned regno;
26851
26852 if (!DECL_STATIC_CHAIN (fndecl))
26853 return NULL;
26854
26855 if (TARGET_64BIT)
26856 {
26857 /* We always use R10 in 64-bit mode. */
26858 regno = R10_REG;
26859 }
26860 else
26861 {
26862 tree fntype;
26863 unsigned int ccvt;
26864
26865 /* By default in 32-bit mode we use ECX to pass the static chain. */
26866 regno = CX_REG;
26867
26868 fntype = TREE_TYPE (fndecl);
26869 ccvt = ix86_get_callcvt (fntype);
26870 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26871 {
26872 /* Fastcall functions use ecx/edx for arguments, which leaves
26873 us with EAX for the static chain.
26874 Thiscall functions use ecx for arguments, which also
26875 leaves us with EAX for the static chain. */
26876 regno = AX_REG;
26877 }
26878 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26879 {
26880 /* Thiscall functions use ecx for arguments, which leaves
26881 us with EAX and EDX for the static chain.
26882 We are using for abi-compatibility EAX. */
26883 regno = AX_REG;
26884 }
26885 else if (ix86_function_regparm (fntype, fndecl) == 3)
26886 {
26887 /* For regparm 3, we have no free call-clobbered registers in
26888 which to store the static chain. In order to implement this,
26889 we have the trampoline push the static chain to the stack.
26890 However, we can't push a value below the return address when
26891 we call the nested function directly, so we have to use an
26892 alternate entry point. For this we use ESI, and have the
26893 alternate entry point push ESI, so that things appear the
26894 same once we're executing the nested function. */
26895 if (incoming_p)
26896 {
26897 if (fndecl == current_function_decl)
26898 ix86_static_chain_on_stack = true;
26899 return gen_frame_mem (SImode,
26900 plus_constant (Pmode,
26901 arg_pointer_rtx, -8));
26902 }
26903 regno = SI_REG;
26904 }
26905 }
26906
26907 return gen_rtx_REG (Pmode, regno);
26908 }
26909
26910 /* Emit RTL insns to initialize the variable parts of a trampoline.
26911 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26912 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26913 to be passed to the target function. */
26914
26915 static void
26916 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26917 {
26918 rtx mem, fnaddr;
26919 int opcode;
26920 int offset = 0;
26921
26922 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26923
26924 if (TARGET_64BIT)
26925 {
26926 int size;
26927
26928 /* Load the function address to r11. Try to load address using
26929 the shorter movl instead of movabs. We may want to support
26930 movq for kernel mode, but kernel does not use trampolines at
26931 the moment. FNADDR is a 32bit address and may not be in
26932 DImode when ptr_mode == SImode. Always use movl in this
26933 case. */
26934 if (ptr_mode == SImode
26935 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26936 {
26937 fnaddr = copy_addr_to_reg (fnaddr);
26938
26939 mem = adjust_address (m_tramp, HImode, offset);
26940 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26941
26942 mem = adjust_address (m_tramp, SImode, offset + 2);
26943 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26944 offset += 6;
26945 }
26946 else
26947 {
26948 mem = adjust_address (m_tramp, HImode, offset);
26949 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26950
26951 mem = adjust_address (m_tramp, DImode, offset + 2);
26952 emit_move_insn (mem, fnaddr);
26953 offset += 10;
26954 }
26955
26956 /* Load static chain using movabs to r10. Use the shorter movl
26957 instead of movabs when ptr_mode == SImode. */
26958 if (ptr_mode == SImode)
26959 {
26960 opcode = 0xba41;
26961 size = 6;
26962 }
26963 else
26964 {
26965 opcode = 0xba49;
26966 size = 10;
26967 }
26968
26969 mem = adjust_address (m_tramp, HImode, offset);
26970 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26971
26972 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26973 emit_move_insn (mem, chain_value);
26974 offset += size;
26975
26976 /* Jump to r11; the last (unused) byte is a nop, only there to
26977 pad the write out to a single 32-bit store. */
26978 mem = adjust_address (m_tramp, SImode, offset);
26979 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26980 offset += 4;
26981 }
26982 else
26983 {
26984 rtx disp, chain;
26985
26986 /* Depending on the static chain location, either load a register
26987 with a constant, or push the constant to the stack. All of the
26988 instructions are the same size. */
26989 chain = ix86_static_chain (fndecl, true);
26990 if (REG_P (chain))
26991 {
26992 switch (REGNO (chain))
26993 {
26994 case AX_REG:
26995 opcode = 0xb8; break;
26996 case CX_REG:
26997 opcode = 0xb9; break;
26998 default:
26999 gcc_unreachable ();
27000 }
27001 }
27002 else
27003 opcode = 0x68;
27004
27005 mem = adjust_address (m_tramp, QImode, offset);
27006 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27007
27008 mem = adjust_address (m_tramp, SImode, offset + 1);
27009 emit_move_insn (mem, chain_value);
27010 offset += 5;
27011
27012 mem = adjust_address (m_tramp, QImode, offset);
27013 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27014
27015 mem = adjust_address (m_tramp, SImode, offset + 1);
27016
27017 /* Compute offset from the end of the jmp to the target function.
27018 In the case in which the trampoline stores the static chain on
27019 the stack, we need to skip the first insn which pushes the
27020 (call-saved) register static chain; this push is 1 byte. */
27021 offset += 5;
27022 disp = expand_binop (SImode, sub_optab, fnaddr,
27023 plus_constant (Pmode, XEXP (m_tramp, 0),
27024 offset - (MEM_P (chain) ? 1 : 0)),
27025 NULL_RTX, 1, OPTAB_DIRECT);
27026 emit_move_insn (mem, disp);
27027 }
27028
27029 gcc_assert (offset <= TRAMPOLINE_SIZE);
27030
27031 #ifdef HAVE_ENABLE_EXECUTE_STACK
27032 #ifdef CHECK_EXECUTE_STACK_ENABLED
27033 if (CHECK_EXECUTE_STACK_ENABLED)
27034 #endif
27035 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27036 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27037 #endif
27038 }
27039 \f
27040 /* The following file contains several enumerations and data structures
27041 built from the definitions in i386-builtin-types.def. */
27042
27043 #include "i386-builtin-types.inc"
27044
27045 /* Table for the ix86 builtin non-function types. */
27046 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27047
27048 /* Retrieve an element from the above table, building some of
27049 the types lazily. */
27050
27051 static tree
27052 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27053 {
27054 unsigned int index;
27055 tree type, itype;
27056
27057 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27058
27059 type = ix86_builtin_type_tab[(int) tcode];
27060 if (type != NULL)
27061 return type;
27062
27063 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27064 if (tcode <= IX86_BT_LAST_VECT)
27065 {
27066 enum machine_mode mode;
27067
27068 index = tcode - IX86_BT_LAST_PRIM - 1;
27069 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27070 mode = ix86_builtin_type_vect_mode[index];
27071
27072 type = build_vector_type_for_mode (itype, mode);
27073 }
27074 else
27075 {
27076 int quals;
27077
27078 index = tcode - IX86_BT_LAST_VECT - 1;
27079 if (tcode <= IX86_BT_LAST_PTR)
27080 quals = TYPE_UNQUALIFIED;
27081 else
27082 quals = TYPE_QUAL_CONST;
27083
27084 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27085 if (quals != TYPE_UNQUALIFIED)
27086 itype = build_qualified_type (itype, quals);
27087
27088 type = build_pointer_type (itype);
27089 }
27090
27091 ix86_builtin_type_tab[(int) tcode] = type;
27092 return type;
27093 }
27094
27095 /* Table for the ix86 builtin function types. */
27096 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27097
27098 /* Retrieve an element from the above table, building some of
27099 the types lazily. */
27100
27101 static tree
27102 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27103 {
27104 tree type;
27105
27106 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27107
27108 type = ix86_builtin_func_type_tab[(int) tcode];
27109 if (type != NULL)
27110 return type;
27111
27112 if (tcode <= IX86_BT_LAST_FUNC)
27113 {
27114 unsigned start = ix86_builtin_func_start[(int) tcode];
27115 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27116 tree rtype, atype, args = void_list_node;
27117 unsigned i;
27118
27119 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27120 for (i = after - 1; i > start; --i)
27121 {
27122 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27123 args = tree_cons (NULL, atype, args);
27124 }
27125
27126 type = build_function_type (rtype, args);
27127 }
27128 else
27129 {
27130 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27131 enum ix86_builtin_func_type icode;
27132
27133 icode = ix86_builtin_func_alias_base[index];
27134 type = ix86_get_builtin_func_type (icode);
27135 }
27136
27137 ix86_builtin_func_type_tab[(int) tcode] = type;
27138 return type;
27139 }
27140
27141
27142 /* Codes for all the SSE/MMX builtins. */
27143 enum ix86_builtins
27144 {
27145 IX86_BUILTIN_ADDPS,
27146 IX86_BUILTIN_ADDSS,
27147 IX86_BUILTIN_DIVPS,
27148 IX86_BUILTIN_DIVSS,
27149 IX86_BUILTIN_MULPS,
27150 IX86_BUILTIN_MULSS,
27151 IX86_BUILTIN_SUBPS,
27152 IX86_BUILTIN_SUBSS,
27153
27154 IX86_BUILTIN_CMPEQPS,
27155 IX86_BUILTIN_CMPLTPS,
27156 IX86_BUILTIN_CMPLEPS,
27157 IX86_BUILTIN_CMPGTPS,
27158 IX86_BUILTIN_CMPGEPS,
27159 IX86_BUILTIN_CMPNEQPS,
27160 IX86_BUILTIN_CMPNLTPS,
27161 IX86_BUILTIN_CMPNLEPS,
27162 IX86_BUILTIN_CMPNGTPS,
27163 IX86_BUILTIN_CMPNGEPS,
27164 IX86_BUILTIN_CMPORDPS,
27165 IX86_BUILTIN_CMPUNORDPS,
27166 IX86_BUILTIN_CMPEQSS,
27167 IX86_BUILTIN_CMPLTSS,
27168 IX86_BUILTIN_CMPLESS,
27169 IX86_BUILTIN_CMPNEQSS,
27170 IX86_BUILTIN_CMPNLTSS,
27171 IX86_BUILTIN_CMPNLESS,
27172 IX86_BUILTIN_CMPORDSS,
27173 IX86_BUILTIN_CMPUNORDSS,
27174
27175 IX86_BUILTIN_COMIEQSS,
27176 IX86_BUILTIN_COMILTSS,
27177 IX86_BUILTIN_COMILESS,
27178 IX86_BUILTIN_COMIGTSS,
27179 IX86_BUILTIN_COMIGESS,
27180 IX86_BUILTIN_COMINEQSS,
27181 IX86_BUILTIN_UCOMIEQSS,
27182 IX86_BUILTIN_UCOMILTSS,
27183 IX86_BUILTIN_UCOMILESS,
27184 IX86_BUILTIN_UCOMIGTSS,
27185 IX86_BUILTIN_UCOMIGESS,
27186 IX86_BUILTIN_UCOMINEQSS,
27187
27188 IX86_BUILTIN_CVTPI2PS,
27189 IX86_BUILTIN_CVTPS2PI,
27190 IX86_BUILTIN_CVTSI2SS,
27191 IX86_BUILTIN_CVTSI642SS,
27192 IX86_BUILTIN_CVTSS2SI,
27193 IX86_BUILTIN_CVTSS2SI64,
27194 IX86_BUILTIN_CVTTPS2PI,
27195 IX86_BUILTIN_CVTTSS2SI,
27196 IX86_BUILTIN_CVTTSS2SI64,
27197
27198 IX86_BUILTIN_MAXPS,
27199 IX86_BUILTIN_MAXSS,
27200 IX86_BUILTIN_MINPS,
27201 IX86_BUILTIN_MINSS,
27202
27203 IX86_BUILTIN_LOADUPS,
27204 IX86_BUILTIN_STOREUPS,
27205 IX86_BUILTIN_MOVSS,
27206
27207 IX86_BUILTIN_MOVHLPS,
27208 IX86_BUILTIN_MOVLHPS,
27209 IX86_BUILTIN_LOADHPS,
27210 IX86_BUILTIN_LOADLPS,
27211 IX86_BUILTIN_STOREHPS,
27212 IX86_BUILTIN_STORELPS,
27213
27214 IX86_BUILTIN_MASKMOVQ,
27215 IX86_BUILTIN_MOVMSKPS,
27216 IX86_BUILTIN_PMOVMSKB,
27217
27218 IX86_BUILTIN_MOVNTPS,
27219 IX86_BUILTIN_MOVNTQ,
27220
27221 IX86_BUILTIN_LOADDQU,
27222 IX86_BUILTIN_STOREDQU,
27223
27224 IX86_BUILTIN_PACKSSWB,
27225 IX86_BUILTIN_PACKSSDW,
27226 IX86_BUILTIN_PACKUSWB,
27227
27228 IX86_BUILTIN_PADDB,
27229 IX86_BUILTIN_PADDW,
27230 IX86_BUILTIN_PADDD,
27231 IX86_BUILTIN_PADDQ,
27232 IX86_BUILTIN_PADDSB,
27233 IX86_BUILTIN_PADDSW,
27234 IX86_BUILTIN_PADDUSB,
27235 IX86_BUILTIN_PADDUSW,
27236 IX86_BUILTIN_PSUBB,
27237 IX86_BUILTIN_PSUBW,
27238 IX86_BUILTIN_PSUBD,
27239 IX86_BUILTIN_PSUBQ,
27240 IX86_BUILTIN_PSUBSB,
27241 IX86_BUILTIN_PSUBSW,
27242 IX86_BUILTIN_PSUBUSB,
27243 IX86_BUILTIN_PSUBUSW,
27244
27245 IX86_BUILTIN_PAND,
27246 IX86_BUILTIN_PANDN,
27247 IX86_BUILTIN_POR,
27248 IX86_BUILTIN_PXOR,
27249
27250 IX86_BUILTIN_PAVGB,
27251 IX86_BUILTIN_PAVGW,
27252
27253 IX86_BUILTIN_PCMPEQB,
27254 IX86_BUILTIN_PCMPEQW,
27255 IX86_BUILTIN_PCMPEQD,
27256 IX86_BUILTIN_PCMPGTB,
27257 IX86_BUILTIN_PCMPGTW,
27258 IX86_BUILTIN_PCMPGTD,
27259
27260 IX86_BUILTIN_PMADDWD,
27261
27262 IX86_BUILTIN_PMAXSW,
27263 IX86_BUILTIN_PMAXUB,
27264 IX86_BUILTIN_PMINSW,
27265 IX86_BUILTIN_PMINUB,
27266
27267 IX86_BUILTIN_PMULHUW,
27268 IX86_BUILTIN_PMULHW,
27269 IX86_BUILTIN_PMULLW,
27270
27271 IX86_BUILTIN_PSADBW,
27272 IX86_BUILTIN_PSHUFW,
27273
27274 IX86_BUILTIN_PSLLW,
27275 IX86_BUILTIN_PSLLD,
27276 IX86_BUILTIN_PSLLQ,
27277 IX86_BUILTIN_PSRAW,
27278 IX86_BUILTIN_PSRAD,
27279 IX86_BUILTIN_PSRLW,
27280 IX86_BUILTIN_PSRLD,
27281 IX86_BUILTIN_PSRLQ,
27282 IX86_BUILTIN_PSLLWI,
27283 IX86_BUILTIN_PSLLDI,
27284 IX86_BUILTIN_PSLLQI,
27285 IX86_BUILTIN_PSRAWI,
27286 IX86_BUILTIN_PSRADI,
27287 IX86_BUILTIN_PSRLWI,
27288 IX86_BUILTIN_PSRLDI,
27289 IX86_BUILTIN_PSRLQI,
27290
27291 IX86_BUILTIN_PUNPCKHBW,
27292 IX86_BUILTIN_PUNPCKHWD,
27293 IX86_BUILTIN_PUNPCKHDQ,
27294 IX86_BUILTIN_PUNPCKLBW,
27295 IX86_BUILTIN_PUNPCKLWD,
27296 IX86_BUILTIN_PUNPCKLDQ,
27297
27298 IX86_BUILTIN_SHUFPS,
27299
27300 IX86_BUILTIN_RCPPS,
27301 IX86_BUILTIN_RCPSS,
27302 IX86_BUILTIN_RSQRTPS,
27303 IX86_BUILTIN_RSQRTPS_NR,
27304 IX86_BUILTIN_RSQRTSS,
27305 IX86_BUILTIN_RSQRTF,
27306 IX86_BUILTIN_SQRTPS,
27307 IX86_BUILTIN_SQRTPS_NR,
27308 IX86_BUILTIN_SQRTSS,
27309
27310 IX86_BUILTIN_UNPCKHPS,
27311 IX86_BUILTIN_UNPCKLPS,
27312
27313 IX86_BUILTIN_ANDPS,
27314 IX86_BUILTIN_ANDNPS,
27315 IX86_BUILTIN_ORPS,
27316 IX86_BUILTIN_XORPS,
27317
27318 IX86_BUILTIN_EMMS,
27319 IX86_BUILTIN_LDMXCSR,
27320 IX86_BUILTIN_STMXCSR,
27321 IX86_BUILTIN_SFENCE,
27322
27323 IX86_BUILTIN_FXSAVE,
27324 IX86_BUILTIN_FXRSTOR,
27325 IX86_BUILTIN_FXSAVE64,
27326 IX86_BUILTIN_FXRSTOR64,
27327
27328 IX86_BUILTIN_XSAVE,
27329 IX86_BUILTIN_XRSTOR,
27330 IX86_BUILTIN_XSAVE64,
27331 IX86_BUILTIN_XRSTOR64,
27332
27333 IX86_BUILTIN_XSAVEOPT,
27334 IX86_BUILTIN_XSAVEOPT64,
27335
27336 IX86_BUILTIN_XSAVEC,
27337 IX86_BUILTIN_XSAVEC64,
27338
27339 IX86_BUILTIN_XSAVES,
27340 IX86_BUILTIN_XRSTORS,
27341 IX86_BUILTIN_XSAVES64,
27342 IX86_BUILTIN_XRSTORS64,
27343
27344 /* 3DNow! Original */
27345 IX86_BUILTIN_FEMMS,
27346 IX86_BUILTIN_PAVGUSB,
27347 IX86_BUILTIN_PF2ID,
27348 IX86_BUILTIN_PFACC,
27349 IX86_BUILTIN_PFADD,
27350 IX86_BUILTIN_PFCMPEQ,
27351 IX86_BUILTIN_PFCMPGE,
27352 IX86_BUILTIN_PFCMPGT,
27353 IX86_BUILTIN_PFMAX,
27354 IX86_BUILTIN_PFMIN,
27355 IX86_BUILTIN_PFMUL,
27356 IX86_BUILTIN_PFRCP,
27357 IX86_BUILTIN_PFRCPIT1,
27358 IX86_BUILTIN_PFRCPIT2,
27359 IX86_BUILTIN_PFRSQIT1,
27360 IX86_BUILTIN_PFRSQRT,
27361 IX86_BUILTIN_PFSUB,
27362 IX86_BUILTIN_PFSUBR,
27363 IX86_BUILTIN_PI2FD,
27364 IX86_BUILTIN_PMULHRW,
27365
27366 /* 3DNow! Athlon Extensions */
27367 IX86_BUILTIN_PF2IW,
27368 IX86_BUILTIN_PFNACC,
27369 IX86_BUILTIN_PFPNACC,
27370 IX86_BUILTIN_PI2FW,
27371 IX86_BUILTIN_PSWAPDSI,
27372 IX86_BUILTIN_PSWAPDSF,
27373
27374 /* SSE2 */
27375 IX86_BUILTIN_ADDPD,
27376 IX86_BUILTIN_ADDSD,
27377 IX86_BUILTIN_DIVPD,
27378 IX86_BUILTIN_DIVSD,
27379 IX86_BUILTIN_MULPD,
27380 IX86_BUILTIN_MULSD,
27381 IX86_BUILTIN_SUBPD,
27382 IX86_BUILTIN_SUBSD,
27383
27384 IX86_BUILTIN_CMPEQPD,
27385 IX86_BUILTIN_CMPLTPD,
27386 IX86_BUILTIN_CMPLEPD,
27387 IX86_BUILTIN_CMPGTPD,
27388 IX86_BUILTIN_CMPGEPD,
27389 IX86_BUILTIN_CMPNEQPD,
27390 IX86_BUILTIN_CMPNLTPD,
27391 IX86_BUILTIN_CMPNLEPD,
27392 IX86_BUILTIN_CMPNGTPD,
27393 IX86_BUILTIN_CMPNGEPD,
27394 IX86_BUILTIN_CMPORDPD,
27395 IX86_BUILTIN_CMPUNORDPD,
27396 IX86_BUILTIN_CMPEQSD,
27397 IX86_BUILTIN_CMPLTSD,
27398 IX86_BUILTIN_CMPLESD,
27399 IX86_BUILTIN_CMPNEQSD,
27400 IX86_BUILTIN_CMPNLTSD,
27401 IX86_BUILTIN_CMPNLESD,
27402 IX86_BUILTIN_CMPORDSD,
27403 IX86_BUILTIN_CMPUNORDSD,
27404
27405 IX86_BUILTIN_COMIEQSD,
27406 IX86_BUILTIN_COMILTSD,
27407 IX86_BUILTIN_COMILESD,
27408 IX86_BUILTIN_COMIGTSD,
27409 IX86_BUILTIN_COMIGESD,
27410 IX86_BUILTIN_COMINEQSD,
27411 IX86_BUILTIN_UCOMIEQSD,
27412 IX86_BUILTIN_UCOMILTSD,
27413 IX86_BUILTIN_UCOMILESD,
27414 IX86_BUILTIN_UCOMIGTSD,
27415 IX86_BUILTIN_UCOMIGESD,
27416 IX86_BUILTIN_UCOMINEQSD,
27417
27418 IX86_BUILTIN_MAXPD,
27419 IX86_BUILTIN_MAXSD,
27420 IX86_BUILTIN_MINPD,
27421 IX86_BUILTIN_MINSD,
27422
27423 IX86_BUILTIN_ANDPD,
27424 IX86_BUILTIN_ANDNPD,
27425 IX86_BUILTIN_ORPD,
27426 IX86_BUILTIN_XORPD,
27427
27428 IX86_BUILTIN_SQRTPD,
27429 IX86_BUILTIN_SQRTSD,
27430
27431 IX86_BUILTIN_UNPCKHPD,
27432 IX86_BUILTIN_UNPCKLPD,
27433
27434 IX86_BUILTIN_SHUFPD,
27435
27436 IX86_BUILTIN_LOADUPD,
27437 IX86_BUILTIN_STOREUPD,
27438 IX86_BUILTIN_MOVSD,
27439
27440 IX86_BUILTIN_LOADHPD,
27441 IX86_BUILTIN_LOADLPD,
27442
27443 IX86_BUILTIN_CVTDQ2PD,
27444 IX86_BUILTIN_CVTDQ2PS,
27445
27446 IX86_BUILTIN_CVTPD2DQ,
27447 IX86_BUILTIN_CVTPD2PI,
27448 IX86_BUILTIN_CVTPD2PS,
27449 IX86_BUILTIN_CVTTPD2DQ,
27450 IX86_BUILTIN_CVTTPD2PI,
27451
27452 IX86_BUILTIN_CVTPI2PD,
27453 IX86_BUILTIN_CVTSI2SD,
27454 IX86_BUILTIN_CVTSI642SD,
27455
27456 IX86_BUILTIN_CVTSD2SI,
27457 IX86_BUILTIN_CVTSD2SI64,
27458 IX86_BUILTIN_CVTSD2SS,
27459 IX86_BUILTIN_CVTSS2SD,
27460 IX86_BUILTIN_CVTTSD2SI,
27461 IX86_BUILTIN_CVTTSD2SI64,
27462
27463 IX86_BUILTIN_CVTPS2DQ,
27464 IX86_BUILTIN_CVTPS2PD,
27465 IX86_BUILTIN_CVTTPS2DQ,
27466
27467 IX86_BUILTIN_MOVNTI,
27468 IX86_BUILTIN_MOVNTI64,
27469 IX86_BUILTIN_MOVNTPD,
27470 IX86_BUILTIN_MOVNTDQ,
27471
27472 IX86_BUILTIN_MOVQ128,
27473
27474 /* SSE2 MMX */
27475 IX86_BUILTIN_MASKMOVDQU,
27476 IX86_BUILTIN_MOVMSKPD,
27477 IX86_BUILTIN_PMOVMSKB128,
27478
27479 IX86_BUILTIN_PACKSSWB128,
27480 IX86_BUILTIN_PACKSSDW128,
27481 IX86_BUILTIN_PACKUSWB128,
27482
27483 IX86_BUILTIN_PADDB128,
27484 IX86_BUILTIN_PADDW128,
27485 IX86_BUILTIN_PADDD128,
27486 IX86_BUILTIN_PADDQ128,
27487 IX86_BUILTIN_PADDSB128,
27488 IX86_BUILTIN_PADDSW128,
27489 IX86_BUILTIN_PADDUSB128,
27490 IX86_BUILTIN_PADDUSW128,
27491 IX86_BUILTIN_PSUBB128,
27492 IX86_BUILTIN_PSUBW128,
27493 IX86_BUILTIN_PSUBD128,
27494 IX86_BUILTIN_PSUBQ128,
27495 IX86_BUILTIN_PSUBSB128,
27496 IX86_BUILTIN_PSUBSW128,
27497 IX86_BUILTIN_PSUBUSB128,
27498 IX86_BUILTIN_PSUBUSW128,
27499
27500 IX86_BUILTIN_PAND128,
27501 IX86_BUILTIN_PANDN128,
27502 IX86_BUILTIN_POR128,
27503 IX86_BUILTIN_PXOR128,
27504
27505 IX86_BUILTIN_PAVGB128,
27506 IX86_BUILTIN_PAVGW128,
27507
27508 IX86_BUILTIN_PCMPEQB128,
27509 IX86_BUILTIN_PCMPEQW128,
27510 IX86_BUILTIN_PCMPEQD128,
27511 IX86_BUILTIN_PCMPGTB128,
27512 IX86_BUILTIN_PCMPGTW128,
27513 IX86_BUILTIN_PCMPGTD128,
27514
27515 IX86_BUILTIN_PMADDWD128,
27516
27517 IX86_BUILTIN_PMAXSW128,
27518 IX86_BUILTIN_PMAXUB128,
27519 IX86_BUILTIN_PMINSW128,
27520 IX86_BUILTIN_PMINUB128,
27521
27522 IX86_BUILTIN_PMULUDQ,
27523 IX86_BUILTIN_PMULUDQ128,
27524 IX86_BUILTIN_PMULHUW128,
27525 IX86_BUILTIN_PMULHW128,
27526 IX86_BUILTIN_PMULLW128,
27527
27528 IX86_BUILTIN_PSADBW128,
27529 IX86_BUILTIN_PSHUFHW,
27530 IX86_BUILTIN_PSHUFLW,
27531 IX86_BUILTIN_PSHUFD,
27532
27533 IX86_BUILTIN_PSLLDQI128,
27534 IX86_BUILTIN_PSLLWI128,
27535 IX86_BUILTIN_PSLLDI128,
27536 IX86_BUILTIN_PSLLQI128,
27537 IX86_BUILTIN_PSRAWI128,
27538 IX86_BUILTIN_PSRADI128,
27539 IX86_BUILTIN_PSRLDQI128,
27540 IX86_BUILTIN_PSRLWI128,
27541 IX86_BUILTIN_PSRLDI128,
27542 IX86_BUILTIN_PSRLQI128,
27543
27544 IX86_BUILTIN_PSLLDQ128,
27545 IX86_BUILTIN_PSLLW128,
27546 IX86_BUILTIN_PSLLD128,
27547 IX86_BUILTIN_PSLLQ128,
27548 IX86_BUILTIN_PSRAW128,
27549 IX86_BUILTIN_PSRAD128,
27550 IX86_BUILTIN_PSRLW128,
27551 IX86_BUILTIN_PSRLD128,
27552 IX86_BUILTIN_PSRLQ128,
27553
27554 IX86_BUILTIN_PUNPCKHBW128,
27555 IX86_BUILTIN_PUNPCKHWD128,
27556 IX86_BUILTIN_PUNPCKHDQ128,
27557 IX86_BUILTIN_PUNPCKHQDQ128,
27558 IX86_BUILTIN_PUNPCKLBW128,
27559 IX86_BUILTIN_PUNPCKLWD128,
27560 IX86_BUILTIN_PUNPCKLDQ128,
27561 IX86_BUILTIN_PUNPCKLQDQ128,
27562
27563 IX86_BUILTIN_CLFLUSH,
27564 IX86_BUILTIN_MFENCE,
27565 IX86_BUILTIN_LFENCE,
27566 IX86_BUILTIN_PAUSE,
27567
27568 IX86_BUILTIN_FNSTENV,
27569 IX86_BUILTIN_FLDENV,
27570 IX86_BUILTIN_FNSTSW,
27571 IX86_BUILTIN_FNCLEX,
27572
27573 IX86_BUILTIN_BSRSI,
27574 IX86_BUILTIN_BSRDI,
27575 IX86_BUILTIN_RDPMC,
27576 IX86_BUILTIN_RDTSC,
27577 IX86_BUILTIN_RDTSCP,
27578 IX86_BUILTIN_ROLQI,
27579 IX86_BUILTIN_ROLHI,
27580 IX86_BUILTIN_RORQI,
27581 IX86_BUILTIN_RORHI,
27582
27583 /* SSE3. */
27584 IX86_BUILTIN_ADDSUBPS,
27585 IX86_BUILTIN_HADDPS,
27586 IX86_BUILTIN_HSUBPS,
27587 IX86_BUILTIN_MOVSHDUP,
27588 IX86_BUILTIN_MOVSLDUP,
27589 IX86_BUILTIN_ADDSUBPD,
27590 IX86_BUILTIN_HADDPD,
27591 IX86_BUILTIN_HSUBPD,
27592 IX86_BUILTIN_LDDQU,
27593
27594 IX86_BUILTIN_MONITOR,
27595 IX86_BUILTIN_MWAIT,
27596
27597 /* SSSE3. */
27598 IX86_BUILTIN_PHADDW,
27599 IX86_BUILTIN_PHADDD,
27600 IX86_BUILTIN_PHADDSW,
27601 IX86_BUILTIN_PHSUBW,
27602 IX86_BUILTIN_PHSUBD,
27603 IX86_BUILTIN_PHSUBSW,
27604 IX86_BUILTIN_PMADDUBSW,
27605 IX86_BUILTIN_PMULHRSW,
27606 IX86_BUILTIN_PSHUFB,
27607 IX86_BUILTIN_PSIGNB,
27608 IX86_BUILTIN_PSIGNW,
27609 IX86_BUILTIN_PSIGND,
27610 IX86_BUILTIN_PALIGNR,
27611 IX86_BUILTIN_PABSB,
27612 IX86_BUILTIN_PABSW,
27613 IX86_BUILTIN_PABSD,
27614
27615 IX86_BUILTIN_PHADDW128,
27616 IX86_BUILTIN_PHADDD128,
27617 IX86_BUILTIN_PHADDSW128,
27618 IX86_BUILTIN_PHSUBW128,
27619 IX86_BUILTIN_PHSUBD128,
27620 IX86_BUILTIN_PHSUBSW128,
27621 IX86_BUILTIN_PMADDUBSW128,
27622 IX86_BUILTIN_PMULHRSW128,
27623 IX86_BUILTIN_PSHUFB128,
27624 IX86_BUILTIN_PSIGNB128,
27625 IX86_BUILTIN_PSIGNW128,
27626 IX86_BUILTIN_PSIGND128,
27627 IX86_BUILTIN_PALIGNR128,
27628 IX86_BUILTIN_PABSB128,
27629 IX86_BUILTIN_PABSW128,
27630 IX86_BUILTIN_PABSD128,
27631
27632 /* AMDFAM10 - SSE4A New Instructions. */
27633 IX86_BUILTIN_MOVNTSD,
27634 IX86_BUILTIN_MOVNTSS,
27635 IX86_BUILTIN_EXTRQI,
27636 IX86_BUILTIN_EXTRQ,
27637 IX86_BUILTIN_INSERTQI,
27638 IX86_BUILTIN_INSERTQ,
27639
27640 /* SSE4.1. */
27641 IX86_BUILTIN_BLENDPD,
27642 IX86_BUILTIN_BLENDPS,
27643 IX86_BUILTIN_BLENDVPD,
27644 IX86_BUILTIN_BLENDVPS,
27645 IX86_BUILTIN_PBLENDVB128,
27646 IX86_BUILTIN_PBLENDW128,
27647
27648 IX86_BUILTIN_DPPD,
27649 IX86_BUILTIN_DPPS,
27650
27651 IX86_BUILTIN_INSERTPS128,
27652
27653 IX86_BUILTIN_MOVNTDQA,
27654 IX86_BUILTIN_MPSADBW128,
27655 IX86_BUILTIN_PACKUSDW128,
27656 IX86_BUILTIN_PCMPEQQ,
27657 IX86_BUILTIN_PHMINPOSUW128,
27658
27659 IX86_BUILTIN_PMAXSB128,
27660 IX86_BUILTIN_PMAXSD128,
27661 IX86_BUILTIN_PMAXUD128,
27662 IX86_BUILTIN_PMAXUW128,
27663
27664 IX86_BUILTIN_PMINSB128,
27665 IX86_BUILTIN_PMINSD128,
27666 IX86_BUILTIN_PMINUD128,
27667 IX86_BUILTIN_PMINUW128,
27668
27669 IX86_BUILTIN_PMOVSXBW128,
27670 IX86_BUILTIN_PMOVSXBD128,
27671 IX86_BUILTIN_PMOVSXBQ128,
27672 IX86_BUILTIN_PMOVSXWD128,
27673 IX86_BUILTIN_PMOVSXWQ128,
27674 IX86_BUILTIN_PMOVSXDQ128,
27675
27676 IX86_BUILTIN_PMOVZXBW128,
27677 IX86_BUILTIN_PMOVZXBD128,
27678 IX86_BUILTIN_PMOVZXBQ128,
27679 IX86_BUILTIN_PMOVZXWD128,
27680 IX86_BUILTIN_PMOVZXWQ128,
27681 IX86_BUILTIN_PMOVZXDQ128,
27682
27683 IX86_BUILTIN_PMULDQ128,
27684 IX86_BUILTIN_PMULLD128,
27685
27686 IX86_BUILTIN_ROUNDSD,
27687 IX86_BUILTIN_ROUNDSS,
27688
27689 IX86_BUILTIN_ROUNDPD,
27690 IX86_BUILTIN_ROUNDPS,
27691
27692 IX86_BUILTIN_FLOORPD,
27693 IX86_BUILTIN_CEILPD,
27694 IX86_BUILTIN_TRUNCPD,
27695 IX86_BUILTIN_RINTPD,
27696 IX86_BUILTIN_ROUNDPD_AZ,
27697
27698 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27699 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27700 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27701
27702 IX86_BUILTIN_FLOORPS,
27703 IX86_BUILTIN_CEILPS,
27704 IX86_BUILTIN_TRUNCPS,
27705 IX86_BUILTIN_RINTPS,
27706 IX86_BUILTIN_ROUNDPS_AZ,
27707
27708 IX86_BUILTIN_FLOORPS_SFIX,
27709 IX86_BUILTIN_CEILPS_SFIX,
27710 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27711
27712 IX86_BUILTIN_PTESTZ,
27713 IX86_BUILTIN_PTESTC,
27714 IX86_BUILTIN_PTESTNZC,
27715
27716 IX86_BUILTIN_VEC_INIT_V2SI,
27717 IX86_BUILTIN_VEC_INIT_V4HI,
27718 IX86_BUILTIN_VEC_INIT_V8QI,
27719 IX86_BUILTIN_VEC_EXT_V2DF,
27720 IX86_BUILTIN_VEC_EXT_V2DI,
27721 IX86_BUILTIN_VEC_EXT_V4SF,
27722 IX86_BUILTIN_VEC_EXT_V4SI,
27723 IX86_BUILTIN_VEC_EXT_V8HI,
27724 IX86_BUILTIN_VEC_EXT_V2SI,
27725 IX86_BUILTIN_VEC_EXT_V4HI,
27726 IX86_BUILTIN_VEC_EXT_V16QI,
27727 IX86_BUILTIN_VEC_SET_V2DI,
27728 IX86_BUILTIN_VEC_SET_V4SF,
27729 IX86_BUILTIN_VEC_SET_V4SI,
27730 IX86_BUILTIN_VEC_SET_V8HI,
27731 IX86_BUILTIN_VEC_SET_V4HI,
27732 IX86_BUILTIN_VEC_SET_V16QI,
27733
27734 IX86_BUILTIN_VEC_PACK_SFIX,
27735 IX86_BUILTIN_VEC_PACK_SFIX256,
27736
27737 /* SSE4.2. */
27738 IX86_BUILTIN_CRC32QI,
27739 IX86_BUILTIN_CRC32HI,
27740 IX86_BUILTIN_CRC32SI,
27741 IX86_BUILTIN_CRC32DI,
27742
27743 IX86_BUILTIN_PCMPESTRI128,
27744 IX86_BUILTIN_PCMPESTRM128,
27745 IX86_BUILTIN_PCMPESTRA128,
27746 IX86_BUILTIN_PCMPESTRC128,
27747 IX86_BUILTIN_PCMPESTRO128,
27748 IX86_BUILTIN_PCMPESTRS128,
27749 IX86_BUILTIN_PCMPESTRZ128,
27750 IX86_BUILTIN_PCMPISTRI128,
27751 IX86_BUILTIN_PCMPISTRM128,
27752 IX86_BUILTIN_PCMPISTRA128,
27753 IX86_BUILTIN_PCMPISTRC128,
27754 IX86_BUILTIN_PCMPISTRO128,
27755 IX86_BUILTIN_PCMPISTRS128,
27756 IX86_BUILTIN_PCMPISTRZ128,
27757
27758 IX86_BUILTIN_PCMPGTQ,
27759
27760 /* AES instructions */
27761 IX86_BUILTIN_AESENC128,
27762 IX86_BUILTIN_AESENCLAST128,
27763 IX86_BUILTIN_AESDEC128,
27764 IX86_BUILTIN_AESDECLAST128,
27765 IX86_BUILTIN_AESIMC128,
27766 IX86_BUILTIN_AESKEYGENASSIST128,
27767
27768 /* PCLMUL instruction */
27769 IX86_BUILTIN_PCLMULQDQ128,
27770
27771 /* AVX */
27772 IX86_BUILTIN_ADDPD256,
27773 IX86_BUILTIN_ADDPS256,
27774 IX86_BUILTIN_ADDSUBPD256,
27775 IX86_BUILTIN_ADDSUBPS256,
27776 IX86_BUILTIN_ANDPD256,
27777 IX86_BUILTIN_ANDPS256,
27778 IX86_BUILTIN_ANDNPD256,
27779 IX86_BUILTIN_ANDNPS256,
27780 IX86_BUILTIN_BLENDPD256,
27781 IX86_BUILTIN_BLENDPS256,
27782 IX86_BUILTIN_BLENDVPD256,
27783 IX86_BUILTIN_BLENDVPS256,
27784 IX86_BUILTIN_DIVPD256,
27785 IX86_BUILTIN_DIVPS256,
27786 IX86_BUILTIN_DPPS256,
27787 IX86_BUILTIN_HADDPD256,
27788 IX86_BUILTIN_HADDPS256,
27789 IX86_BUILTIN_HSUBPD256,
27790 IX86_BUILTIN_HSUBPS256,
27791 IX86_BUILTIN_MAXPD256,
27792 IX86_BUILTIN_MAXPS256,
27793 IX86_BUILTIN_MINPD256,
27794 IX86_BUILTIN_MINPS256,
27795 IX86_BUILTIN_MULPD256,
27796 IX86_BUILTIN_MULPS256,
27797 IX86_BUILTIN_ORPD256,
27798 IX86_BUILTIN_ORPS256,
27799 IX86_BUILTIN_SHUFPD256,
27800 IX86_BUILTIN_SHUFPS256,
27801 IX86_BUILTIN_SUBPD256,
27802 IX86_BUILTIN_SUBPS256,
27803 IX86_BUILTIN_XORPD256,
27804 IX86_BUILTIN_XORPS256,
27805 IX86_BUILTIN_CMPSD,
27806 IX86_BUILTIN_CMPSS,
27807 IX86_BUILTIN_CMPPD,
27808 IX86_BUILTIN_CMPPS,
27809 IX86_BUILTIN_CMPPD256,
27810 IX86_BUILTIN_CMPPS256,
27811 IX86_BUILTIN_CVTDQ2PD256,
27812 IX86_BUILTIN_CVTDQ2PS256,
27813 IX86_BUILTIN_CVTPD2PS256,
27814 IX86_BUILTIN_CVTPS2DQ256,
27815 IX86_BUILTIN_CVTPS2PD256,
27816 IX86_BUILTIN_CVTTPD2DQ256,
27817 IX86_BUILTIN_CVTPD2DQ256,
27818 IX86_BUILTIN_CVTTPS2DQ256,
27819 IX86_BUILTIN_EXTRACTF128PD256,
27820 IX86_BUILTIN_EXTRACTF128PS256,
27821 IX86_BUILTIN_EXTRACTF128SI256,
27822 IX86_BUILTIN_VZEROALL,
27823 IX86_BUILTIN_VZEROUPPER,
27824 IX86_BUILTIN_VPERMILVARPD,
27825 IX86_BUILTIN_VPERMILVARPS,
27826 IX86_BUILTIN_VPERMILVARPD256,
27827 IX86_BUILTIN_VPERMILVARPS256,
27828 IX86_BUILTIN_VPERMILPD,
27829 IX86_BUILTIN_VPERMILPS,
27830 IX86_BUILTIN_VPERMILPD256,
27831 IX86_BUILTIN_VPERMILPS256,
27832 IX86_BUILTIN_VPERMIL2PD,
27833 IX86_BUILTIN_VPERMIL2PS,
27834 IX86_BUILTIN_VPERMIL2PD256,
27835 IX86_BUILTIN_VPERMIL2PS256,
27836 IX86_BUILTIN_VPERM2F128PD256,
27837 IX86_BUILTIN_VPERM2F128PS256,
27838 IX86_BUILTIN_VPERM2F128SI256,
27839 IX86_BUILTIN_VBROADCASTSS,
27840 IX86_BUILTIN_VBROADCASTSD256,
27841 IX86_BUILTIN_VBROADCASTSS256,
27842 IX86_BUILTIN_VBROADCASTPD256,
27843 IX86_BUILTIN_VBROADCASTPS256,
27844 IX86_BUILTIN_VINSERTF128PD256,
27845 IX86_BUILTIN_VINSERTF128PS256,
27846 IX86_BUILTIN_VINSERTF128SI256,
27847 IX86_BUILTIN_LOADUPD256,
27848 IX86_BUILTIN_LOADUPS256,
27849 IX86_BUILTIN_STOREUPD256,
27850 IX86_BUILTIN_STOREUPS256,
27851 IX86_BUILTIN_LDDQU256,
27852 IX86_BUILTIN_MOVNTDQ256,
27853 IX86_BUILTIN_MOVNTPD256,
27854 IX86_BUILTIN_MOVNTPS256,
27855 IX86_BUILTIN_LOADDQU256,
27856 IX86_BUILTIN_STOREDQU256,
27857 IX86_BUILTIN_MASKLOADPD,
27858 IX86_BUILTIN_MASKLOADPS,
27859 IX86_BUILTIN_MASKSTOREPD,
27860 IX86_BUILTIN_MASKSTOREPS,
27861 IX86_BUILTIN_MASKLOADPD256,
27862 IX86_BUILTIN_MASKLOADPS256,
27863 IX86_BUILTIN_MASKSTOREPD256,
27864 IX86_BUILTIN_MASKSTOREPS256,
27865 IX86_BUILTIN_MOVSHDUP256,
27866 IX86_BUILTIN_MOVSLDUP256,
27867 IX86_BUILTIN_MOVDDUP256,
27868
27869 IX86_BUILTIN_SQRTPD256,
27870 IX86_BUILTIN_SQRTPS256,
27871 IX86_BUILTIN_SQRTPS_NR256,
27872 IX86_BUILTIN_RSQRTPS256,
27873 IX86_BUILTIN_RSQRTPS_NR256,
27874
27875 IX86_BUILTIN_RCPPS256,
27876
27877 IX86_BUILTIN_ROUNDPD256,
27878 IX86_BUILTIN_ROUNDPS256,
27879
27880 IX86_BUILTIN_FLOORPD256,
27881 IX86_BUILTIN_CEILPD256,
27882 IX86_BUILTIN_TRUNCPD256,
27883 IX86_BUILTIN_RINTPD256,
27884 IX86_BUILTIN_ROUNDPD_AZ256,
27885
27886 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27887 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27888 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27889
27890 IX86_BUILTIN_FLOORPS256,
27891 IX86_BUILTIN_CEILPS256,
27892 IX86_BUILTIN_TRUNCPS256,
27893 IX86_BUILTIN_RINTPS256,
27894 IX86_BUILTIN_ROUNDPS_AZ256,
27895
27896 IX86_BUILTIN_FLOORPS_SFIX256,
27897 IX86_BUILTIN_CEILPS_SFIX256,
27898 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27899
27900 IX86_BUILTIN_UNPCKHPD256,
27901 IX86_BUILTIN_UNPCKLPD256,
27902 IX86_BUILTIN_UNPCKHPS256,
27903 IX86_BUILTIN_UNPCKLPS256,
27904
27905 IX86_BUILTIN_SI256_SI,
27906 IX86_BUILTIN_PS256_PS,
27907 IX86_BUILTIN_PD256_PD,
27908 IX86_BUILTIN_SI_SI256,
27909 IX86_BUILTIN_PS_PS256,
27910 IX86_BUILTIN_PD_PD256,
27911
27912 IX86_BUILTIN_VTESTZPD,
27913 IX86_BUILTIN_VTESTCPD,
27914 IX86_BUILTIN_VTESTNZCPD,
27915 IX86_BUILTIN_VTESTZPS,
27916 IX86_BUILTIN_VTESTCPS,
27917 IX86_BUILTIN_VTESTNZCPS,
27918 IX86_BUILTIN_VTESTZPD256,
27919 IX86_BUILTIN_VTESTCPD256,
27920 IX86_BUILTIN_VTESTNZCPD256,
27921 IX86_BUILTIN_VTESTZPS256,
27922 IX86_BUILTIN_VTESTCPS256,
27923 IX86_BUILTIN_VTESTNZCPS256,
27924 IX86_BUILTIN_PTESTZ256,
27925 IX86_BUILTIN_PTESTC256,
27926 IX86_BUILTIN_PTESTNZC256,
27927
27928 IX86_BUILTIN_MOVMSKPD256,
27929 IX86_BUILTIN_MOVMSKPS256,
27930
27931 /* AVX2 */
27932 IX86_BUILTIN_MPSADBW256,
27933 IX86_BUILTIN_PABSB256,
27934 IX86_BUILTIN_PABSW256,
27935 IX86_BUILTIN_PABSD256,
27936 IX86_BUILTIN_PACKSSDW256,
27937 IX86_BUILTIN_PACKSSWB256,
27938 IX86_BUILTIN_PACKUSDW256,
27939 IX86_BUILTIN_PACKUSWB256,
27940 IX86_BUILTIN_PADDB256,
27941 IX86_BUILTIN_PADDW256,
27942 IX86_BUILTIN_PADDD256,
27943 IX86_BUILTIN_PADDQ256,
27944 IX86_BUILTIN_PADDSB256,
27945 IX86_BUILTIN_PADDSW256,
27946 IX86_BUILTIN_PADDUSB256,
27947 IX86_BUILTIN_PADDUSW256,
27948 IX86_BUILTIN_PALIGNR256,
27949 IX86_BUILTIN_AND256I,
27950 IX86_BUILTIN_ANDNOT256I,
27951 IX86_BUILTIN_PAVGB256,
27952 IX86_BUILTIN_PAVGW256,
27953 IX86_BUILTIN_PBLENDVB256,
27954 IX86_BUILTIN_PBLENDVW256,
27955 IX86_BUILTIN_PCMPEQB256,
27956 IX86_BUILTIN_PCMPEQW256,
27957 IX86_BUILTIN_PCMPEQD256,
27958 IX86_BUILTIN_PCMPEQQ256,
27959 IX86_BUILTIN_PCMPGTB256,
27960 IX86_BUILTIN_PCMPGTW256,
27961 IX86_BUILTIN_PCMPGTD256,
27962 IX86_BUILTIN_PCMPGTQ256,
27963 IX86_BUILTIN_PHADDW256,
27964 IX86_BUILTIN_PHADDD256,
27965 IX86_BUILTIN_PHADDSW256,
27966 IX86_BUILTIN_PHSUBW256,
27967 IX86_BUILTIN_PHSUBD256,
27968 IX86_BUILTIN_PHSUBSW256,
27969 IX86_BUILTIN_PMADDUBSW256,
27970 IX86_BUILTIN_PMADDWD256,
27971 IX86_BUILTIN_PMAXSB256,
27972 IX86_BUILTIN_PMAXSW256,
27973 IX86_BUILTIN_PMAXSD256,
27974 IX86_BUILTIN_PMAXUB256,
27975 IX86_BUILTIN_PMAXUW256,
27976 IX86_BUILTIN_PMAXUD256,
27977 IX86_BUILTIN_PMINSB256,
27978 IX86_BUILTIN_PMINSW256,
27979 IX86_BUILTIN_PMINSD256,
27980 IX86_BUILTIN_PMINUB256,
27981 IX86_BUILTIN_PMINUW256,
27982 IX86_BUILTIN_PMINUD256,
27983 IX86_BUILTIN_PMOVMSKB256,
27984 IX86_BUILTIN_PMOVSXBW256,
27985 IX86_BUILTIN_PMOVSXBD256,
27986 IX86_BUILTIN_PMOVSXBQ256,
27987 IX86_BUILTIN_PMOVSXWD256,
27988 IX86_BUILTIN_PMOVSXWQ256,
27989 IX86_BUILTIN_PMOVSXDQ256,
27990 IX86_BUILTIN_PMOVZXBW256,
27991 IX86_BUILTIN_PMOVZXBD256,
27992 IX86_BUILTIN_PMOVZXBQ256,
27993 IX86_BUILTIN_PMOVZXWD256,
27994 IX86_BUILTIN_PMOVZXWQ256,
27995 IX86_BUILTIN_PMOVZXDQ256,
27996 IX86_BUILTIN_PMULDQ256,
27997 IX86_BUILTIN_PMULHRSW256,
27998 IX86_BUILTIN_PMULHUW256,
27999 IX86_BUILTIN_PMULHW256,
28000 IX86_BUILTIN_PMULLW256,
28001 IX86_BUILTIN_PMULLD256,
28002 IX86_BUILTIN_PMULUDQ256,
28003 IX86_BUILTIN_POR256,
28004 IX86_BUILTIN_PSADBW256,
28005 IX86_BUILTIN_PSHUFB256,
28006 IX86_BUILTIN_PSHUFD256,
28007 IX86_BUILTIN_PSHUFHW256,
28008 IX86_BUILTIN_PSHUFLW256,
28009 IX86_BUILTIN_PSIGNB256,
28010 IX86_BUILTIN_PSIGNW256,
28011 IX86_BUILTIN_PSIGND256,
28012 IX86_BUILTIN_PSLLDQI256,
28013 IX86_BUILTIN_PSLLWI256,
28014 IX86_BUILTIN_PSLLW256,
28015 IX86_BUILTIN_PSLLDI256,
28016 IX86_BUILTIN_PSLLD256,
28017 IX86_BUILTIN_PSLLQI256,
28018 IX86_BUILTIN_PSLLQ256,
28019 IX86_BUILTIN_PSRAWI256,
28020 IX86_BUILTIN_PSRAW256,
28021 IX86_BUILTIN_PSRADI256,
28022 IX86_BUILTIN_PSRAD256,
28023 IX86_BUILTIN_PSRLDQI256,
28024 IX86_BUILTIN_PSRLWI256,
28025 IX86_BUILTIN_PSRLW256,
28026 IX86_BUILTIN_PSRLDI256,
28027 IX86_BUILTIN_PSRLD256,
28028 IX86_BUILTIN_PSRLQI256,
28029 IX86_BUILTIN_PSRLQ256,
28030 IX86_BUILTIN_PSUBB256,
28031 IX86_BUILTIN_PSUBW256,
28032 IX86_BUILTIN_PSUBD256,
28033 IX86_BUILTIN_PSUBQ256,
28034 IX86_BUILTIN_PSUBSB256,
28035 IX86_BUILTIN_PSUBSW256,
28036 IX86_BUILTIN_PSUBUSB256,
28037 IX86_BUILTIN_PSUBUSW256,
28038 IX86_BUILTIN_PUNPCKHBW256,
28039 IX86_BUILTIN_PUNPCKHWD256,
28040 IX86_BUILTIN_PUNPCKHDQ256,
28041 IX86_BUILTIN_PUNPCKHQDQ256,
28042 IX86_BUILTIN_PUNPCKLBW256,
28043 IX86_BUILTIN_PUNPCKLWD256,
28044 IX86_BUILTIN_PUNPCKLDQ256,
28045 IX86_BUILTIN_PUNPCKLQDQ256,
28046 IX86_BUILTIN_PXOR256,
28047 IX86_BUILTIN_MOVNTDQA256,
28048 IX86_BUILTIN_VBROADCASTSS_PS,
28049 IX86_BUILTIN_VBROADCASTSS_PS256,
28050 IX86_BUILTIN_VBROADCASTSD_PD256,
28051 IX86_BUILTIN_VBROADCASTSI256,
28052 IX86_BUILTIN_PBLENDD256,
28053 IX86_BUILTIN_PBLENDD128,
28054 IX86_BUILTIN_PBROADCASTB256,
28055 IX86_BUILTIN_PBROADCASTW256,
28056 IX86_BUILTIN_PBROADCASTD256,
28057 IX86_BUILTIN_PBROADCASTQ256,
28058 IX86_BUILTIN_PBROADCASTB128,
28059 IX86_BUILTIN_PBROADCASTW128,
28060 IX86_BUILTIN_PBROADCASTD128,
28061 IX86_BUILTIN_PBROADCASTQ128,
28062 IX86_BUILTIN_VPERMVARSI256,
28063 IX86_BUILTIN_VPERMDF256,
28064 IX86_BUILTIN_VPERMVARSF256,
28065 IX86_BUILTIN_VPERMDI256,
28066 IX86_BUILTIN_VPERMTI256,
28067 IX86_BUILTIN_VEXTRACT128I256,
28068 IX86_BUILTIN_VINSERT128I256,
28069 IX86_BUILTIN_MASKLOADD,
28070 IX86_BUILTIN_MASKLOADQ,
28071 IX86_BUILTIN_MASKLOADD256,
28072 IX86_BUILTIN_MASKLOADQ256,
28073 IX86_BUILTIN_MASKSTORED,
28074 IX86_BUILTIN_MASKSTOREQ,
28075 IX86_BUILTIN_MASKSTORED256,
28076 IX86_BUILTIN_MASKSTOREQ256,
28077 IX86_BUILTIN_PSLLVV4DI,
28078 IX86_BUILTIN_PSLLVV2DI,
28079 IX86_BUILTIN_PSLLVV8SI,
28080 IX86_BUILTIN_PSLLVV4SI,
28081 IX86_BUILTIN_PSRAVV8SI,
28082 IX86_BUILTIN_PSRAVV4SI,
28083 IX86_BUILTIN_PSRLVV4DI,
28084 IX86_BUILTIN_PSRLVV2DI,
28085 IX86_BUILTIN_PSRLVV8SI,
28086 IX86_BUILTIN_PSRLVV4SI,
28087
28088 IX86_BUILTIN_GATHERSIV2DF,
28089 IX86_BUILTIN_GATHERSIV4DF,
28090 IX86_BUILTIN_GATHERDIV2DF,
28091 IX86_BUILTIN_GATHERDIV4DF,
28092 IX86_BUILTIN_GATHERSIV4SF,
28093 IX86_BUILTIN_GATHERSIV8SF,
28094 IX86_BUILTIN_GATHERDIV4SF,
28095 IX86_BUILTIN_GATHERDIV8SF,
28096 IX86_BUILTIN_GATHERSIV2DI,
28097 IX86_BUILTIN_GATHERSIV4DI,
28098 IX86_BUILTIN_GATHERDIV2DI,
28099 IX86_BUILTIN_GATHERDIV4DI,
28100 IX86_BUILTIN_GATHERSIV4SI,
28101 IX86_BUILTIN_GATHERSIV8SI,
28102 IX86_BUILTIN_GATHERDIV4SI,
28103 IX86_BUILTIN_GATHERDIV8SI,
28104
28105 /* AVX512F */
28106 IX86_BUILTIN_ADDPD512,
28107 IX86_BUILTIN_ADDPS512,
28108 IX86_BUILTIN_ADDSD_ROUND,
28109 IX86_BUILTIN_ADDSS_ROUND,
28110 IX86_BUILTIN_ALIGND512,
28111 IX86_BUILTIN_ALIGNQ512,
28112 IX86_BUILTIN_BLENDMD512,
28113 IX86_BUILTIN_BLENDMPD512,
28114 IX86_BUILTIN_BLENDMPS512,
28115 IX86_BUILTIN_BLENDMQ512,
28116 IX86_BUILTIN_BROADCASTF32X4_512,
28117 IX86_BUILTIN_BROADCASTF64X4_512,
28118 IX86_BUILTIN_BROADCASTI32X4_512,
28119 IX86_BUILTIN_BROADCASTI64X4_512,
28120 IX86_BUILTIN_BROADCASTSD512,
28121 IX86_BUILTIN_BROADCASTSS512,
28122 IX86_BUILTIN_CMPD512,
28123 IX86_BUILTIN_CMPPD512,
28124 IX86_BUILTIN_CMPPS512,
28125 IX86_BUILTIN_CMPQ512,
28126 IX86_BUILTIN_CMPSD_MASK,
28127 IX86_BUILTIN_CMPSS_MASK,
28128 IX86_BUILTIN_COMIDF,
28129 IX86_BUILTIN_COMISF,
28130 IX86_BUILTIN_COMPRESSPD512,
28131 IX86_BUILTIN_COMPRESSPDSTORE512,
28132 IX86_BUILTIN_COMPRESSPS512,
28133 IX86_BUILTIN_COMPRESSPSSTORE512,
28134 IX86_BUILTIN_CVTDQ2PD512,
28135 IX86_BUILTIN_CVTDQ2PS512,
28136 IX86_BUILTIN_CVTPD2DQ512,
28137 IX86_BUILTIN_CVTPD2PS512,
28138 IX86_BUILTIN_CVTPD2UDQ512,
28139 IX86_BUILTIN_CVTPH2PS512,
28140 IX86_BUILTIN_CVTPS2DQ512,
28141 IX86_BUILTIN_CVTPS2PD512,
28142 IX86_BUILTIN_CVTPS2PH512,
28143 IX86_BUILTIN_CVTPS2UDQ512,
28144 IX86_BUILTIN_CVTSD2SS_ROUND,
28145 IX86_BUILTIN_CVTSI2SD64,
28146 IX86_BUILTIN_CVTSI2SS32,
28147 IX86_BUILTIN_CVTSI2SS64,
28148 IX86_BUILTIN_CVTSS2SD_ROUND,
28149 IX86_BUILTIN_CVTTPD2DQ512,
28150 IX86_BUILTIN_CVTTPD2UDQ512,
28151 IX86_BUILTIN_CVTTPS2DQ512,
28152 IX86_BUILTIN_CVTTPS2UDQ512,
28153 IX86_BUILTIN_CVTUDQ2PD512,
28154 IX86_BUILTIN_CVTUDQ2PS512,
28155 IX86_BUILTIN_CVTUSI2SD32,
28156 IX86_BUILTIN_CVTUSI2SD64,
28157 IX86_BUILTIN_CVTUSI2SS32,
28158 IX86_BUILTIN_CVTUSI2SS64,
28159 IX86_BUILTIN_DIVPD512,
28160 IX86_BUILTIN_DIVPS512,
28161 IX86_BUILTIN_DIVSD_ROUND,
28162 IX86_BUILTIN_DIVSS_ROUND,
28163 IX86_BUILTIN_EXPANDPD512,
28164 IX86_BUILTIN_EXPANDPD512Z,
28165 IX86_BUILTIN_EXPANDPDLOAD512,
28166 IX86_BUILTIN_EXPANDPDLOAD512Z,
28167 IX86_BUILTIN_EXPANDPS512,
28168 IX86_BUILTIN_EXPANDPS512Z,
28169 IX86_BUILTIN_EXPANDPSLOAD512,
28170 IX86_BUILTIN_EXPANDPSLOAD512Z,
28171 IX86_BUILTIN_EXTRACTF32X4,
28172 IX86_BUILTIN_EXTRACTF64X4,
28173 IX86_BUILTIN_EXTRACTI32X4,
28174 IX86_BUILTIN_EXTRACTI64X4,
28175 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28176 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28177 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28178 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28179 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28180 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28181 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28182 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28183 IX86_BUILTIN_GETEXPPD512,
28184 IX86_BUILTIN_GETEXPPS512,
28185 IX86_BUILTIN_GETEXPSD128,
28186 IX86_BUILTIN_GETEXPSS128,
28187 IX86_BUILTIN_GETMANTPD512,
28188 IX86_BUILTIN_GETMANTPS512,
28189 IX86_BUILTIN_GETMANTSD128,
28190 IX86_BUILTIN_GETMANTSS128,
28191 IX86_BUILTIN_INSERTF32X4,
28192 IX86_BUILTIN_INSERTF64X4,
28193 IX86_BUILTIN_INSERTI32X4,
28194 IX86_BUILTIN_INSERTI64X4,
28195 IX86_BUILTIN_LOADAPD512,
28196 IX86_BUILTIN_LOADAPS512,
28197 IX86_BUILTIN_LOADDQUDI512,
28198 IX86_BUILTIN_LOADDQUSI512,
28199 IX86_BUILTIN_LOADUPD512,
28200 IX86_BUILTIN_LOADUPS512,
28201 IX86_BUILTIN_MAXPD512,
28202 IX86_BUILTIN_MAXPS512,
28203 IX86_BUILTIN_MAXSD_ROUND,
28204 IX86_BUILTIN_MAXSS_ROUND,
28205 IX86_BUILTIN_MINPD512,
28206 IX86_BUILTIN_MINPS512,
28207 IX86_BUILTIN_MINSD_ROUND,
28208 IX86_BUILTIN_MINSS_ROUND,
28209 IX86_BUILTIN_MOVAPD512,
28210 IX86_BUILTIN_MOVAPS512,
28211 IX86_BUILTIN_MOVDDUP512,
28212 IX86_BUILTIN_MOVDQA32LOAD512,
28213 IX86_BUILTIN_MOVDQA32STORE512,
28214 IX86_BUILTIN_MOVDQA32_512,
28215 IX86_BUILTIN_MOVDQA64LOAD512,
28216 IX86_BUILTIN_MOVDQA64STORE512,
28217 IX86_BUILTIN_MOVDQA64_512,
28218 IX86_BUILTIN_MOVNTDQ512,
28219 IX86_BUILTIN_MOVNTDQA512,
28220 IX86_BUILTIN_MOVNTPD512,
28221 IX86_BUILTIN_MOVNTPS512,
28222 IX86_BUILTIN_MOVSHDUP512,
28223 IX86_BUILTIN_MOVSLDUP512,
28224 IX86_BUILTIN_MULPD512,
28225 IX86_BUILTIN_MULPS512,
28226 IX86_BUILTIN_MULSD_ROUND,
28227 IX86_BUILTIN_MULSS_ROUND,
28228 IX86_BUILTIN_PABSD512,
28229 IX86_BUILTIN_PABSQ512,
28230 IX86_BUILTIN_PADDD512,
28231 IX86_BUILTIN_PADDQ512,
28232 IX86_BUILTIN_PANDD512,
28233 IX86_BUILTIN_PANDND512,
28234 IX86_BUILTIN_PANDNQ512,
28235 IX86_BUILTIN_PANDQ512,
28236 IX86_BUILTIN_PBROADCASTD512,
28237 IX86_BUILTIN_PBROADCASTD512_GPR,
28238 IX86_BUILTIN_PBROADCASTMB512,
28239 IX86_BUILTIN_PBROADCASTMW512,
28240 IX86_BUILTIN_PBROADCASTQ512,
28241 IX86_BUILTIN_PBROADCASTQ512_GPR,
28242 IX86_BUILTIN_PBROADCASTQ512_MEM,
28243 IX86_BUILTIN_PCMPEQD512_MASK,
28244 IX86_BUILTIN_PCMPEQQ512_MASK,
28245 IX86_BUILTIN_PCMPGTD512_MASK,
28246 IX86_BUILTIN_PCMPGTQ512_MASK,
28247 IX86_BUILTIN_PCOMPRESSD512,
28248 IX86_BUILTIN_PCOMPRESSDSTORE512,
28249 IX86_BUILTIN_PCOMPRESSQ512,
28250 IX86_BUILTIN_PCOMPRESSQSTORE512,
28251 IX86_BUILTIN_PEXPANDD512,
28252 IX86_BUILTIN_PEXPANDD512Z,
28253 IX86_BUILTIN_PEXPANDDLOAD512,
28254 IX86_BUILTIN_PEXPANDDLOAD512Z,
28255 IX86_BUILTIN_PEXPANDQ512,
28256 IX86_BUILTIN_PEXPANDQ512Z,
28257 IX86_BUILTIN_PEXPANDQLOAD512,
28258 IX86_BUILTIN_PEXPANDQLOAD512Z,
28259 IX86_BUILTIN_PMAXSD512,
28260 IX86_BUILTIN_PMAXSQ512,
28261 IX86_BUILTIN_PMAXUD512,
28262 IX86_BUILTIN_PMAXUQ512,
28263 IX86_BUILTIN_PMINSD512,
28264 IX86_BUILTIN_PMINSQ512,
28265 IX86_BUILTIN_PMINUD512,
28266 IX86_BUILTIN_PMINUQ512,
28267 IX86_BUILTIN_PMOVDB512,
28268 IX86_BUILTIN_PMOVDB512_MEM,
28269 IX86_BUILTIN_PMOVDW512,
28270 IX86_BUILTIN_PMOVDW512_MEM,
28271 IX86_BUILTIN_PMOVQB512,
28272 IX86_BUILTIN_PMOVQB512_MEM,
28273 IX86_BUILTIN_PMOVQD512,
28274 IX86_BUILTIN_PMOVQD512_MEM,
28275 IX86_BUILTIN_PMOVQW512,
28276 IX86_BUILTIN_PMOVQW512_MEM,
28277 IX86_BUILTIN_PMOVSDB512,
28278 IX86_BUILTIN_PMOVSDB512_MEM,
28279 IX86_BUILTIN_PMOVSDW512,
28280 IX86_BUILTIN_PMOVSDW512_MEM,
28281 IX86_BUILTIN_PMOVSQB512,
28282 IX86_BUILTIN_PMOVSQB512_MEM,
28283 IX86_BUILTIN_PMOVSQD512,
28284 IX86_BUILTIN_PMOVSQD512_MEM,
28285 IX86_BUILTIN_PMOVSQW512,
28286 IX86_BUILTIN_PMOVSQW512_MEM,
28287 IX86_BUILTIN_PMOVSXBD512,
28288 IX86_BUILTIN_PMOVSXBQ512,
28289 IX86_BUILTIN_PMOVSXDQ512,
28290 IX86_BUILTIN_PMOVSXWD512,
28291 IX86_BUILTIN_PMOVSXWQ512,
28292 IX86_BUILTIN_PMOVUSDB512,
28293 IX86_BUILTIN_PMOVUSDB512_MEM,
28294 IX86_BUILTIN_PMOVUSDW512,
28295 IX86_BUILTIN_PMOVUSDW512_MEM,
28296 IX86_BUILTIN_PMOVUSQB512,
28297 IX86_BUILTIN_PMOVUSQB512_MEM,
28298 IX86_BUILTIN_PMOVUSQD512,
28299 IX86_BUILTIN_PMOVUSQD512_MEM,
28300 IX86_BUILTIN_PMOVUSQW512,
28301 IX86_BUILTIN_PMOVUSQW512_MEM,
28302 IX86_BUILTIN_PMOVZXBD512,
28303 IX86_BUILTIN_PMOVZXBQ512,
28304 IX86_BUILTIN_PMOVZXDQ512,
28305 IX86_BUILTIN_PMOVZXWD512,
28306 IX86_BUILTIN_PMOVZXWQ512,
28307 IX86_BUILTIN_PMULDQ512,
28308 IX86_BUILTIN_PMULLD512,
28309 IX86_BUILTIN_PMULUDQ512,
28310 IX86_BUILTIN_PORD512,
28311 IX86_BUILTIN_PORQ512,
28312 IX86_BUILTIN_PROLD512,
28313 IX86_BUILTIN_PROLQ512,
28314 IX86_BUILTIN_PROLVD512,
28315 IX86_BUILTIN_PROLVQ512,
28316 IX86_BUILTIN_PRORD512,
28317 IX86_BUILTIN_PRORQ512,
28318 IX86_BUILTIN_PRORVD512,
28319 IX86_BUILTIN_PRORVQ512,
28320 IX86_BUILTIN_PSHUFD512,
28321 IX86_BUILTIN_PSLLD512,
28322 IX86_BUILTIN_PSLLDI512,
28323 IX86_BUILTIN_PSLLQ512,
28324 IX86_BUILTIN_PSLLQI512,
28325 IX86_BUILTIN_PSLLVV16SI,
28326 IX86_BUILTIN_PSLLVV8DI,
28327 IX86_BUILTIN_PSRAD512,
28328 IX86_BUILTIN_PSRADI512,
28329 IX86_BUILTIN_PSRAQ512,
28330 IX86_BUILTIN_PSRAQI512,
28331 IX86_BUILTIN_PSRAVV16SI,
28332 IX86_BUILTIN_PSRAVV8DI,
28333 IX86_BUILTIN_PSRLD512,
28334 IX86_BUILTIN_PSRLDI512,
28335 IX86_BUILTIN_PSRLQ512,
28336 IX86_BUILTIN_PSRLQI512,
28337 IX86_BUILTIN_PSRLVV16SI,
28338 IX86_BUILTIN_PSRLVV8DI,
28339 IX86_BUILTIN_PSUBD512,
28340 IX86_BUILTIN_PSUBQ512,
28341 IX86_BUILTIN_PTESTMD512,
28342 IX86_BUILTIN_PTESTMQ512,
28343 IX86_BUILTIN_PTESTNMD512,
28344 IX86_BUILTIN_PTESTNMQ512,
28345 IX86_BUILTIN_PUNPCKHDQ512,
28346 IX86_BUILTIN_PUNPCKHQDQ512,
28347 IX86_BUILTIN_PUNPCKLDQ512,
28348 IX86_BUILTIN_PUNPCKLQDQ512,
28349 IX86_BUILTIN_PXORD512,
28350 IX86_BUILTIN_PXORQ512,
28351 IX86_BUILTIN_RCP14PD512,
28352 IX86_BUILTIN_RCP14PS512,
28353 IX86_BUILTIN_RCP14SD,
28354 IX86_BUILTIN_RCP14SS,
28355 IX86_BUILTIN_RNDSCALEPD,
28356 IX86_BUILTIN_RNDSCALEPS,
28357 IX86_BUILTIN_RNDSCALESD,
28358 IX86_BUILTIN_RNDSCALESS,
28359 IX86_BUILTIN_RSQRT14PD512,
28360 IX86_BUILTIN_RSQRT14PS512,
28361 IX86_BUILTIN_RSQRT14SD,
28362 IX86_BUILTIN_RSQRT14SS,
28363 IX86_BUILTIN_SCALEFPD512,
28364 IX86_BUILTIN_SCALEFPS512,
28365 IX86_BUILTIN_SCALEFSD,
28366 IX86_BUILTIN_SCALEFSS,
28367 IX86_BUILTIN_SHUFPD512,
28368 IX86_BUILTIN_SHUFPS512,
28369 IX86_BUILTIN_SHUF_F32x4,
28370 IX86_BUILTIN_SHUF_F64x2,
28371 IX86_BUILTIN_SHUF_I32x4,
28372 IX86_BUILTIN_SHUF_I64x2,
28373 IX86_BUILTIN_SQRTPD512,
28374 IX86_BUILTIN_SQRTPD512_MASK,
28375 IX86_BUILTIN_SQRTPS512_MASK,
28376 IX86_BUILTIN_SQRTPS_NR512,
28377 IX86_BUILTIN_SQRTSD_ROUND,
28378 IX86_BUILTIN_SQRTSS_ROUND,
28379 IX86_BUILTIN_STOREAPD512,
28380 IX86_BUILTIN_STOREAPS512,
28381 IX86_BUILTIN_STOREDQUDI512,
28382 IX86_BUILTIN_STOREDQUSI512,
28383 IX86_BUILTIN_STOREUPD512,
28384 IX86_BUILTIN_STOREUPS512,
28385 IX86_BUILTIN_SUBPD512,
28386 IX86_BUILTIN_SUBPS512,
28387 IX86_BUILTIN_SUBSD_ROUND,
28388 IX86_BUILTIN_SUBSS_ROUND,
28389 IX86_BUILTIN_UCMPD512,
28390 IX86_BUILTIN_UCMPQ512,
28391 IX86_BUILTIN_UNPCKHPD512,
28392 IX86_BUILTIN_UNPCKHPS512,
28393 IX86_BUILTIN_UNPCKLPD512,
28394 IX86_BUILTIN_UNPCKLPS512,
28395 IX86_BUILTIN_VCVTSD2SI32,
28396 IX86_BUILTIN_VCVTSD2SI64,
28397 IX86_BUILTIN_VCVTSD2USI32,
28398 IX86_BUILTIN_VCVTSD2USI64,
28399 IX86_BUILTIN_VCVTSS2SI32,
28400 IX86_BUILTIN_VCVTSS2SI64,
28401 IX86_BUILTIN_VCVTSS2USI32,
28402 IX86_BUILTIN_VCVTSS2USI64,
28403 IX86_BUILTIN_VCVTTSD2SI32,
28404 IX86_BUILTIN_VCVTTSD2SI64,
28405 IX86_BUILTIN_VCVTTSD2USI32,
28406 IX86_BUILTIN_VCVTTSD2USI64,
28407 IX86_BUILTIN_VCVTTSS2SI32,
28408 IX86_BUILTIN_VCVTTSS2SI64,
28409 IX86_BUILTIN_VCVTTSS2USI32,
28410 IX86_BUILTIN_VCVTTSS2USI64,
28411 IX86_BUILTIN_VFMADDPD512_MASK,
28412 IX86_BUILTIN_VFMADDPD512_MASK3,
28413 IX86_BUILTIN_VFMADDPD512_MASKZ,
28414 IX86_BUILTIN_VFMADDPS512_MASK,
28415 IX86_BUILTIN_VFMADDPS512_MASK3,
28416 IX86_BUILTIN_VFMADDPS512_MASKZ,
28417 IX86_BUILTIN_VFMADDSD3_ROUND,
28418 IX86_BUILTIN_VFMADDSS3_ROUND,
28419 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28420 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28421 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28422 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28423 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28424 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28425 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28426 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28427 IX86_BUILTIN_VFMSUBPD512_MASK3,
28428 IX86_BUILTIN_VFMSUBPS512_MASK3,
28429 IX86_BUILTIN_VFMSUBSD3_MASK3,
28430 IX86_BUILTIN_VFMSUBSS3_MASK3,
28431 IX86_BUILTIN_VFNMADDPD512_MASK,
28432 IX86_BUILTIN_VFNMADDPS512_MASK,
28433 IX86_BUILTIN_VFNMSUBPD512_MASK,
28434 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28435 IX86_BUILTIN_VFNMSUBPS512_MASK,
28436 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28437 IX86_BUILTIN_VPCLZCNTD512,
28438 IX86_BUILTIN_VPCLZCNTQ512,
28439 IX86_BUILTIN_VPCONFLICTD512,
28440 IX86_BUILTIN_VPCONFLICTQ512,
28441 IX86_BUILTIN_VPERMDF512,
28442 IX86_BUILTIN_VPERMDI512,
28443 IX86_BUILTIN_VPERMI2VARD512,
28444 IX86_BUILTIN_VPERMI2VARPD512,
28445 IX86_BUILTIN_VPERMI2VARPS512,
28446 IX86_BUILTIN_VPERMI2VARQ512,
28447 IX86_BUILTIN_VPERMILPD512,
28448 IX86_BUILTIN_VPERMILPS512,
28449 IX86_BUILTIN_VPERMILVARPD512,
28450 IX86_BUILTIN_VPERMILVARPS512,
28451 IX86_BUILTIN_VPERMT2VARD512,
28452 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28453 IX86_BUILTIN_VPERMT2VARPD512,
28454 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28455 IX86_BUILTIN_VPERMT2VARPS512,
28456 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28457 IX86_BUILTIN_VPERMT2VARQ512,
28458 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28459 IX86_BUILTIN_VPERMVARDF512,
28460 IX86_BUILTIN_VPERMVARDI512,
28461 IX86_BUILTIN_VPERMVARSF512,
28462 IX86_BUILTIN_VPERMVARSI512,
28463 IX86_BUILTIN_VTERNLOGD512_MASK,
28464 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28465 IX86_BUILTIN_VTERNLOGQ512_MASK,
28466 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28467
28468 /* Mask arithmetic operations */
28469 IX86_BUILTIN_KAND16,
28470 IX86_BUILTIN_KANDN16,
28471 IX86_BUILTIN_KNOT16,
28472 IX86_BUILTIN_KOR16,
28473 IX86_BUILTIN_KORTESTC16,
28474 IX86_BUILTIN_KORTESTZ16,
28475 IX86_BUILTIN_KUNPCKBW,
28476 IX86_BUILTIN_KXNOR16,
28477 IX86_BUILTIN_KXOR16,
28478 IX86_BUILTIN_KMOV16,
28479
28480 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28481 where all operands are 32-byte or 64-byte wide respectively. */
28482 IX86_BUILTIN_GATHERALTSIV4DF,
28483 IX86_BUILTIN_GATHERALTDIV8SF,
28484 IX86_BUILTIN_GATHERALTSIV4DI,
28485 IX86_BUILTIN_GATHERALTDIV8SI,
28486 IX86_BUILTIN_GATHER3ALTDIV16SF,
28487 IX86_BUILTIN_GATHER3ALTDIV16SI,
28488 IX86_BUILTIN_GATHER3ALTSIV8DF,
28489 IX86_BUILTIN_GATHER3ALTSIV8DI,
28490 IX86_BUILTIN_GATHER3DIV16SF,
28491 IX86_BUILTIN_GATHER3DIV16SI,
28492 IX86_BUILTIN_GATHER3DIV8DF,
28493 IX86_BUILTIN_GATHER3DIV8DI,
28494 IX86_BUILTIN_GATHER3SIV16SF,
28495 IX86_BUILTIN_GATHER3SIV16SI,
28496 IX86_BUILTIN_GATHER3SIV8DF,
28497 IX86_BUILTIN_GATHER3SIV8DI,
28498 IX86_BUILTIN_SCATTERDIV16SF,
28499 IX86_BUILTIN_SCATTERDIV16SI,
28500 IX86_BUILTIN_SCATTERDIV8DF,
28501 IX86_BUILTIN_SCATTERDIV8DI,
28502 IX86_BUILTIN_SCATTERSIV16SF,
28503 IX86_BUILTIN_SCATTERSIV16SI,
28504 IX86_BUILTIN_SCATTERSIV8DF,
28505 IX86_BUILTIN_SCATTERSIV8DI,
28506
28507 /* AVX512PF */
28508 IX86_BUILTIN_GATHERPFQPD,
28509 IX86_BUILTIN_GATHERPFDPS,
28510 IX86_BUILTIN_GATHERPFDPD,
28511 IX86_BUILTIN_GATHERPFQPS,
28512 IX86_BUILTIN_SCATTERPFDPD,
28513 IX86_BUILTIN_SCATTERPFDPS,
28514 IX86_BUILTIN_SCATTERPFQPD,
28515 IX86_BUILTIN_SCATTERPFQPS,
28516
28517 /* AVX-512ER */
28518 IX86_BUILTIN_EXP2PD_MASK,
28519 IX86_BUILTIN_EXP2PS_MASK,
28520 IX86_BUILTIN_EXP2PS,
28521 IX86_BUILTIN_RCP28PD,
28522 IX86_BUILTIN_RCP28PS,
28523 IX86_BUILTIN_RCP28SD,
28524 IX86_BUILTIN_RCP28SS,
28525 IX86_BUILTIN_RSQRT28PD,
28526 IX86_BUILTIN_RSQRT28PS,
28527 IX86_BUILTIN_RSQRT28SD,
28528 IX86_BUILTIN_RSQRT28SS,
28529
28530 /* SHA builtins. */
28531 IX86_BUILTIN_SHA1MSG1,
28532 IX86_BUILTIN_SHA1MSG2,
28533 IX86_BUILTIN_SHA1NEXTE,
28534 IX86_BUILTIN_SHA1RNDS4,
28535 IX86_BUILTIN_SHA256MSG1,
28536 IX86_BUILTIN_SHA256MSG2,
28537 IX86_BUILTIN_SHA256RNDS2,
28538
28539 /* CLFLUSHOPT instructions. */
28540 IX86_BUILTIN_CLFLUSHOPT,
28541
28542 /* TFmode support builtins. */
28543 IX86_BUILTIN_INFQ,
28544 IX86_BUILTIN_HUGE_VALQ,
28545 IX86_BUILTIN_FABSQ,
28546 IX86_BUILTIN_COPYSIGNQ,
28547
28548 /* Vectorizer support builtins. */
28549 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28550 IX86_BUILTIN_CPYSGNPS,
28551 IX86_BUILTIN_CPYSGNPD,
28552 IX86_BUILTIN_CPYSGNPS256,
28553 IX86_BUILTIN_CPYSGNPS512,
28554 IX86_BUILTIN_CPYSGNPD256,
28555 IX86_BUILTIN_CPYSGNPD512,
28556 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28557 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28558
28559
28560 /* FMA4 instructions. */
28561 IX86_BUILTIN_VFMADDSS,
28562 IX86_BUILTIN_VFMADDSD,
28563 IX86_BUILTIN_VFMADDPS,
28564 IX86_BUILTIN_VFMADDPD,
28565 IX86_BUILTIN_VFMADDPS256,
28566 IX86_BUILTIN_VFMADDPD256,
28567 IX86_BUILTIN_VFMADDSUBPS,
28568 IX86_BUILTIN_VFMADDSUBPD,
28569 IX86_BUILTIN_VFMADDSUBPS256,
28570 IX86_BUILTIN_VFMADDSUBPD256,
28571
28572 /* FMA3 instructions. */
28573 IX86_BUILTIN_VFMADDSS3,
28574 IX86_BUILTIN_VFMADDSD3,
28575
28576 /* XOP instructions. */
28577 IX86_BUILTIN_VPCMOV,
28578 IX86_BUILTIN_VPCMOV_V2DI,
28579 IX86_BUILTIN_VPCMOV_V4SI,
28580 IX86_BUILTIN_VPCMOV_V8HI,
28581 IX86_BUILTIN_VPCMOV_V16QI,
28582 IX86_BUILTIN_VPCMOV_V4SF,
28583 IX86_BUILTIN_VPCMOV_V2DF,
28584 IX86_BUILTIN_VPCMOV256,
28585 IX86_BUILTIN_VPCMOV_V4DI256,
28586 IX86_BUILTIN_VPCMOV_V8SI256,
28587 IX86_BUILTIN_VPCMOV_V16HI256,
28588 IX86_BUILTIN_VPCMOV_V32QI256,
28589 IX86_BUILTIN_VPCMOV_V8SF256,
28590 IX86_BUILTIN_VPCMOV_V4DF256,
28591
28592 IX86_BUILTIN_VPPERM,
28593
28594 IX86_BUILTIN_VPMACSSWW,
28595 IX86_BUILTIN_VPMACSWW,
28596 IX86_BUILTIN_VPMACSSWD,
28597 IX86_BUILTIN_VPMACSWD,
28598 IX86_BUILTIN_VPMACSSDD,
28599 IX86_BUILTIN_VPMACSDD,
28600 IX86_BUILTIN_VPMACSSDQL,
28601 IX86_BUILTIN_VPMACSSDQH,
28602 IX86_BUILTIN_VPMACSDQL,
28603 IX86_BUILTIN_VPMACSDQH,
28604 IX86_BUILTIN_VPMADCSSWD,
28605 IX86_BUILTIN_VPMADCSWD,
28606
28607 IX86_BUILTIN_VPHADDBW,
28608 IX86_BUILTIN_VPHADDBD,
28609 IX86_BUILTIN_VPHADDBQ,
28610 IX86_BUILTIN_VPHADDWD,
28611 IX86_BUILTIN_VPHADDWQ,
28612 IX86_BUILTIN_VPHADDDQ,
28613 IX86_BUILTIN_VPHADDUBW,
28614 IX86_BUILTIN_VPHADDUBD,
28615 IX86_BUILTIN_VPHADDUBQ,
28616 IX86_BUILTIN_VPHADDUWD,
28617 IX86_BUILTIN_VPHADDUWQ,
28618 IX86_BUILTIN_VPHADDUDQ,
28619 IX86_BUILTIN_VPHSUBBW,
28620 IX86_BUILTIN_VPHSUBWD,
28621 IX86_BUILTIN_VPHSUBDQ,
28622
28623 IX86_BUILTIN_VPROTB,
28624 IX86_BUILTIN_VPROTW,
28625 IX86_BUILTIN_VPROTD,
28626 IX86_BUILTIN_VPROTQ,
28627 IX86_BUILTIN_VPROTB_IMM,
28628 IX86_BUILTIN_VPROTW_IMM,
28629 IX86_BUILTIN_VPROTD_IMM,
28630 IX86_BUILTIN_VPROTQ_IMM,
28631
28632 IX86_BUILTIN_VPSHLB,
28633 IX86_BUILTIN_VPSHLW,
28634 IX86_BUILTIN_VPSHLD,
28635 IX86_BUILTIN_VPSHLQ,
28636 IX86_BUILTIN_VPSHAB,
28637 IX86_BUILTIN_VPSHAW,
28638 IX86_BUILTIN_VPSHAD,
28639 IX86_BUILTIN_VPSHAQ,
28640
28641 IX86_BUILTIN_VFRCZSS,
28642 IX86_BUILTIN_VFRCZSD,
28643 IX86_BUILTIN_VFRCZPS,
28644 IX86_BUILTIN_VFRCZPD,
28645 IX86_BUILTIN_VFRCZPS256,
28646 IX86_BUILTIN_VFRCZPD256,
28647
28648 IX86_BUILTIN_VPCOMEQUB,
28649 IX86_BUILTIN_VPCOMNEUB,
28650 IX86_BUILTIN_VPCOMLTUB,
28651 IX86_BUILTIN_VPCOMLEUB,
28652 IX86_BUILTIN_VPCOMGTUB,
28653 IX86_BUILTIN_VPCOMGEUB,
28654 IX86_BUILTIN_VPCOMFALSEUB,
28655 IX86_BUILTIN_VPCOMTRUEUB,
28656
28657 IX86_BUILTIN_VPCOMEQUW,
28658 IX86_BUILTIN_VPCOMNEUW,
28659 IX86_BUILTIN_VPCOMLTUW,
28660 IX86_BUILTIN_VPCOMLEUW,
28661 IX86_BUILTIN_VPCOMGTUW,
28662 IX86_BUILTIN_VPCOMGEUW,
28663 IX86_BUILTIN_VPCOMFALSEUW,
28664 IX86_BUILTIN_VPCOMTRUEUW,
28665
28666 IX86_BUILTIN_VPCOMEQUD,
28667 IX86_BUILTIN_VPCOMNEUD,
28668 IX86_BUILTIN_VPCOMLTUD,
28669 IX86_BUILTIN_VPCOMLEUD,
28670 IX86_BUILTIN_VPCOMGTUD,
28671 IX86_BUILTIN_VPCOMGEUD,
28672 IX86_BUILTIN_VPCOMFALSEUD,
28673 IX86_BUILTIN_VPCOMTRUEUD,
28674
28675 IX86_BUILTIN_VPCOMEQUQ,
28676 IX86_BUILTIN_VPCOMNEUQ,
28677 IX86_BUILTIN_VPCOMLTUQ,
28678 IX86_BUILTIN_VPCOMLEUQ,
28679 IX86_BUILTIN_VPCOMGTUQ,
28680 IX86_BUILTIN_VPCOMGEUQ,
28681 IX86_BUILTIN_VPCOMFALSEUQ,
28682 IX86_BUILTIN_VPCOMTRUEUQ,
28683
28684 IX86_BUILTIN_VPCOMEQB,
28685 IX86_BUILTIN_VPCOMNEB,
28686 IX86_BUILTIN_VPCOMLTB,
28687 IX86_BUILTIN_VPCOMLEB,
28688 IX86_BUILTIN_VPCOMGTB,
28689 IX86_BUILTIN_VPCOMGEB,
28690 IX86_BUILTIN_VPCOMFALSEB,
28691 IX86_BUILTIN_VPCOMTRUEB,
28692
28693 IX86_BUILTIN_VPCOMEQW,
28694 IX86_BUILTIN_VPCOMNEW,
28695 IX86_BUILTIN_VPCOMLTW,
28696 IX86_BUILTIN_VPCOMLEW,
28697 IX86_BUILTIN_VPCOMGTW,
28698 IX86_BUILTIN_VPCOMGEW,
28699 IX86_BUILTIN_VPCOMFALSEW,
28700 IX86_BUILTIN_VPCOMTRUEW,
28701
28702 IX86_BUILTIN_VPCOMEQD,
28703 IX86_BUILTIN_VPCOMNED,
28704 IX86_BUILTIN_VPCOMLTD,
28705 IX86_BUILTIN_VPCOMLED,
28706 IX86_BUILTIN_VPCOMGTD,
28707 IX86_BUILTIN_VPCOMGED,
28708 IX86_BUILTIN_VPCOMFALSED,
28709 IX86_BUILTIN_VPCOMTRUED,
28710
28711 IX86_BUILTIN_VPCOMEQQ,
28712 IX86_BUILTIN_VPCOMNEQ,
28713 IX86_BUILTIN_VPCOMLTQ,
28714 IX86_BUILTIN_VPCOMLEQ,
28715 IX86_BUILTIN_VPCOMGTQ,
28716 IX86_BUILTIN_VPCOMGEQ,
28717 IX86_BUILTIN_VPCOMFALSEQ,
28718 IX86_BUILTIN_VPCOMTRUEQ,
28719
28720 /* LWP instructions. */
28721 IX86_BUILTIN_LLWPCB,
28722 IX86_BUILTIN_SLWPCB,
28723 IX86_BUILTIN_LWPVAL32,
28724 IX86_BUILTIN_LWPVAL64,
28725 IX86_BUILTIN_LWPINS32,
28726 IX86_BUILTIN_LWPINS64,
28727
28728 IX86_BUILTIN_CLZS,
28729
28730 /* RTM */
28731 IX86_BUILTIN_XBEGIN,
28732 IX86_BUILTIN_XEND,
28733 IX86_BUILTIN_XABORT,
28734 IX86_BUILTIN_XTEST,
28735
28736 /* BMI instructions. */
28737 IX86_BUILTIN_BEXTR32,
28738 IX86_BUILTIN_BEXTR64,
28739 IX86_BUILTIN_CTZS,
28740
28741 /* TBM instructions. */
28742 IX86_BUILTIN_BEXTRI32,
28743 IX86_BUILTIN_BEXTRI64,
28744
28745 /* BMI2 instructions. */
28746 IX86_BUILTIN_BZHI32,
28747 IX86_BUILTIN_BZHI64,
28748 IX86_BUILTIN_PDEP32,
28749 IX86_BUILTIN_PDEP64,
28750 IX86_BUILTIN_PEXT32,
28751 IX86_BUILTIN_PEXT64,
28752
28753 /* ADX instructions. */
28754 IX86_BUILTIN_ADDCARRYX32,
28755 IX86_BUILTIN_ADDCARRYX64,
28756
28757 /* FSGSBASE instructions. */
28758 IX86_BUILTIN_RDFSBASE32,
28759 IX86_BUILTIN_RDFSBASE64,
28760 IX86_BUILTIN_RDGSBASE32,
28761 IX86_BUILTIN_RDGSBASE64,
28762 IX86_BUILTIN_WRFSBASE32,
28763 IX86_BUILTIN_WRFSBASE64,
28764 IX86_BUILTIN_WRGSBASE32,
28765 IX86_BUILTIN_WRGSBASE64,
28766
28767 /* RDRND instructions. */
28768 IX86_BUILTIN_RDRAND16_STEP,
28769 IX86_BUILTIN_RDRAND32_STEP,
28770 IX86_BUILTIN_RDRAND64_STEP,
28771
28772 /* RDSEED instructions. */
28773 IX86_BUILTIN_RDSEED16_STEP,
28774 IX86_BUILTIN_RDSEED32_STEP,
28775 IX86_BUILTIN_RDSEED64_STEP,
28776
28777 /* F16C instructions. */
28778 IX86_BUILTIN_CVTPH2PS,
28779 IX86_BUILTIN_CVTPH2PS256,
28780 IX86_BUILTIN_CVTPS2PH,
28781 IX86_BUILTIN_CVTPS2PH256,
28782
28783 /* CFString built-in for darwin */
28784 IX86_BUILTIN_CFSTRING,
28785
28786 /* Builtins to get CPU type and supported features. */
28787 IX86_BUILTIN_CPU_INIT,
28788 IX86_BUILTIN_CPU_IS,
28789 IX86_BUILTIN_CPU_SUPPORTS,
28790
28791 /* Read/write FLAGS register built-ins. */
28792 IX86_BUILTIN_READ_FLAGS,
28793 IX86_BUILTIN_WRITE_FLAGS,
28794
28795 IX86_BUILTIN_MAX
28796 };
28797
28798 /* Table for the ix86 builtin decls. */
28799 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28800
28801 /* Table of all of the builtin functions that are possible with different ISA's
28802 but are waiting to be built until a function is declared to use that
28803 ISA. */
28804 struct builtin_isa {
28805 const char *name; /* function name */
28806 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28807 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28808 bool const_p; /* true if the declaration is constant */
28809 bool set_and_not_built_p;
28810 };
28811
28812 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28813
28814
28815 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28816 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28817 function decl in the ix86_builtins array. Returns the function decl or
28818 NULL_TREE, if the builtin was not added.
28819
28820 If the front end has a special hook for builtin functions, delay adding
28821 builtin functions that aren't in the current ISA until the ISA is changed
28822 with function specific optimization. Doing so, can save about 300K for the
28823 default compiler. When the builtin is expanded, check at that time whether
28824 it is valid.
28825
28826 If the front end doesn't have a special hook, record all builtins, even if
28827 it isn't an instruction set in the current ISA in case the user uses
28828 function specific options for a different ISA, so that we don't get scope
28829 errors if a builtin is added in the middle of a function scope. */
28830
28831 static inline tree
28832 def_builtin (HOST_WIDE_INT mask, const char *name,
28833 enum ix86_builtin_func_type tcode,
28834 enum ix86_builtins code)
28835 {
28836 tree decl = NULL_TREE;
28837
28838 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28839 {
28840 ix86_builtins_isa[(int) code].isa = mask;
28841
28842 mask &= ~OPTION_MASK_ISA_64BIT;
28843 if (mask == 0
28844 || (mask & ix86_isa_flags) != 0
28845 || (lang_hooks.builtin_function
28846 == lang_hooks.builtin_function_ext_scope))
28847
28848 {
28849 tree type = ix86_get_builtin_func_type (tcode);
28850 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28851 NULL, NULL_TREE);
28852 ix86_builtins[(int) code] = decl;
28853 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28854 }
28855 else
28856 {
28857 ix86_builtins[(int) code] = NULL_TREE;
28858 ix86_builtins_isa[(int) code].tcode = tcode;
28859 ix86_builtins_isa[(int) code].name = name;
28860 ix86_builtins_isa[(int) code].const_p = false;
28861 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28862 }
28863 }
28864
28865 return decl;
28866 }
28867
28868 /* Like def_builtin, but also marks the function decl "const". */
28869
28870 static inline tree
28871 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28872 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28873 {
28874 tree decl = def_builtin (mask, name, tcode, code);
28875 if (decl)
28876 TREE_READONLY (decl) = 1;
28877 else
28878 ix86_builtins_isa[(int) code].const_p = true;
28879
28880 return decl;
28881 }
28882
28883 /* Add any new builtin functions for a given ISA that may not have been
28884 declared. This saves a bit of space compared to adding all of the
28885 declarations to the tree, even if we didn't use them. */
28886
28887 static void
28888 ix86_add_new_builtins (HOST_WIDE_INT isa)
28889 {
28890 int i;
28891
28892 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28893 {
28894 if ((ix86_builtins_isa[i].isa & isa) != 0
28895 && ix86_builtins_isa[i].set_and_not_built_p)
28896 {
28897 tree decl, type;
28898
28899 /* Don't define the builtin again. */
28900 ix86_builtins_isa[i].set_and_not_built_p = false;
28901
28902 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28903 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28904 type, i, BUILT_IN_MD, NULL,
28905 NULL_TREE);
28906
28907 ix86_builtins[i] = decl;
28908 if (ix86_builtins_isa[i].const_p)
28909 TREE_READONLY (decl) = 1;
28910 }
28911 }
28912 }
28913
28914 /* Bits for builtin_description.flag. */
28915
28916 /* Set when we don't support the comparison natively, and should
28917 swap_comparison in order to support it. */
28918 #define BUILTIN_DESC_SWAP_OPERANDS 1
28919
28920 struct builtin_description
28921 {
28922 const HOST_WIDE_INT mask;
28923 const enum insn_code icode;
28924 const char *const name;
28925 const enum ix86_builtins code;
28926 const enum rtx_code comparison;
28927 const int flag;
28928 };
28929
28930 static const struct builtin_description bdesc_comi[] =
28931 {
28932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28956 };
28957
28958 static const struct builtin_description bdesc_pcmpestr[] =
28959 {
28960 /* SSE4.2 */
28961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28968 };
28969
28970 static const struct builtin_description bdesc_pcmpistr[] =
28971 {
28972 /* SSE4.2 */
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28980 };
28981
28982 /* Special builtins with variable number of arguments. */
28983 static const struct builtin_description bdesc_special_args[] =
28984 {
28985 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28986 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28987 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28988
28989 /* 80387 (for use internally for atomic compound assignment). */
28990 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28991 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28992 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
28993 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28994
28995 /* MMX */
28996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28997
28998 /* 3DNow! */
28999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29000
29001 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29002 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29003 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29004 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29005 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29006 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29007 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29008 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010
29011 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29012 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29013 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29016 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29019
29020 /* SSE */
29021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29024
29025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29029
29030 /* SSE or 3DNow!A */
29031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29032 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29033
29034 /* SSE2 */
29035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29042 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29045
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29048
29049 /* SSE3 */
29050 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29051
29052 /* SSE4.1 */
29053 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29054
29055 /* SSE4A */
29056 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29057 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29058
29059 /* AVX */
29060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29062
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29064 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29068
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29076
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29080
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29089
29090 /* AVX2 */
29091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29100
29101 /* AVX512F */
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29149
29150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29152 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29154 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29155 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29156
29157 /* FSGSBASE */
29158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29166
29167 /* RTM */
29168 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29169 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29170 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29171 };
29172
29173 /* Builtins with variable number of arguments. */
29174 static const struct builtin_description bdesc_args[] =
29175 {
29176 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29177 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29179 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29180 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29182 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29183
29184 /* MMX */
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29191
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29200
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29208
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29215
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29222
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29226
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29228
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29235
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29242
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29247
29248 /* 3DNow! */
29249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29253
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29269
29270 /* 3DNow!A */
29271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29272 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29273 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29275 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29276 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29277
29278 /* SSE */
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29291
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29293
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29302
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29323
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29341
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29344 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29345
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29347
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29351
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29354
29355 /* SSE MMX or 3Dnow!A */
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29359
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29364
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29367
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29369
29370 /* SSE2 */
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29372
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29378
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29384
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29386
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29389 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29390 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29391
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29395
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29404
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29425
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29441
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29443
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29452
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29461
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29464
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29469
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29472
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29479
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29484
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29493
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29497
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29500
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29503
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29505
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29507 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29510
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29518
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29526
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29531
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29535
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29537
29538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29539
29540 /* SSE2 MMX */
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29542 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29543
29544 /* SSE3 */
29545 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29546 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29547
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29554
29555 /* SSSE3 */
29556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29562
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29587
29588 /* SSSE3. */
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29591
29592 /* SSE4.1 */
29593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29603
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29617
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29630
29631 /* SSE4.1 */
29632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29636
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29641
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29644
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29647
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29652
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29655
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29658
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29661 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29662
29663 /* SSE4.2 */
29664 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29665 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29666 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29667 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29668 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29669
29670 /* SSE4A */
29671 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29672 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29673 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29675
29676 /* AES */
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29679
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29684
29685 /* PCLMUL */
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29687
29688 /* AVX */
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29715
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29720
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29755
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29759
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29765
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29767
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29770
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29775
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29778
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29781
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29786
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29789
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29792
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29797
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29804
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29820
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29823
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29826
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29828
29829 /* AVX2 */
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29976
29977 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29978
29979 /* BMI */
29980 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29981 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29982 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29983
29984 /* TBM */
29985 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29986 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29987
29988 /* F16C */
29989 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29990 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29991 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29992 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29993
29994 /* BMI2 */
29995 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29996 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29997 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29998 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29999 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30000 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30001
30002 /* AVX512F */
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30053 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30164 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30165 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30166 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30167 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30194
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30199 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30203
30204 /* Mask arithmetic operations */
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30215
30216 /* SHA */
30217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30224 };
30225
30226 /* Builtins with rounding support. */
30227 static const struct builtin_description bdesc_round_args[] =
30228 {
30229 /* AVX512F */
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30249 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30251 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30260 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30310 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30312 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30318 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30320 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30322 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30324 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30349
30350 /* AVX512ER */
30351 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30352 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30353 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30354 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30355 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30359 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30360 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30361 };
30362
30363 /* FMA4 and XOP. */
30364 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30365 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30366 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30367 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30368 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30369 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30370 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30371 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30372 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30373 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30374 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30375 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30376 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30377 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30378 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30379 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30380 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30381 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30382 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30383 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30384 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30385 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30386 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30387 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30388 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30389 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30390 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30391 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30392 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30393 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30394 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30395 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30396 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30397 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30398 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30399 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30400 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30401 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30402 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30403 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30404 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30405 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30406 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30407 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30408 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30409 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30410 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30411 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30412 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30413 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30414 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30415 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30416
30417 static const struct builtin_description bdesc_multi_arg[] =
30418 {
30419 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30420 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30421 UNKNOWN, (int)MULTI_ARG_3_SF },
30422 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30423 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30424 UNKNOWN, (int)MULTI_ARG_3_DF },
30425
30426 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30427 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30428 UNKNOWN, (int)MULTI_ARG_3_SF },
30429 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30430 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30431 UNKNOWN, (int)MULTI_ARG_3_DF },
30432
30433 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30434 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30435 UNKNOWN, (int)MULTI_ARG_3_SF },
30436 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30437 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30438 UNKNOWN, (int)MULTI_ARG_3_DF },
30439 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30440 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30441 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30442 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30443 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30444 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30445
30446 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30447 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30448 UNKNOWN, (int)MULTI_ARG_3_SF },
30449 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30450 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30451 UNKNOWN, (int)MULTI_ARG_3_DF },
30452 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30453 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30454 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30455 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30456 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30457 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30458
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30466
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30474
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30476
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30489
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30506
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30513
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30529
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30537
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30545
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30553
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30561
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30569
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30577
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30585
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30593
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30602
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30611
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30616
30617 };
30618 \f
30619 /* TM vector builtins. */
30620
30621 /* Reuse the existing x86-specific `struct builtin_description' cause
30622 we're lazy. Add casts to make them fit. */
30623 static const struct builtin_description bdesc_tm[] =
30624 {
30625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30626 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30627 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30628 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30629 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30630 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30631 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30632
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30634 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30635 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30636 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30637 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30638 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30639 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30640
30641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30642 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30643 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30644 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30645 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30646 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30647 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30648
30649 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30650 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30651 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30652 };
30653
30654 /* TM callbacks. */
30655
30656 /* Return the builtin decl needed to load a vector of TYPE. */
30657
30658 static tree
30659 ix86_builtin_tm_load (tree type)
30660 {
30661 if (TREE_CODE (type) == VECTOR_TYPE)
30662 {
30663 switch (tree_to_uhwi (TYPE_SIZE (type)))
30664 {
30665 case 64:
30666 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30667 case 128:
30668 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30669 case 256:
30670 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30671 }
30672 }
30673 return NULL_TREE;
30674 }
30675
30676 /* Return the builtin decl needed to store a vector of TYPE. */
30677
30678 static tree
30679 ix86_builtin_tm_store (tree type)
30680 {
30681 if (TREE_CODE (type) == VECTOR_TYPE)
30682 {
30683 switch (tree_to_uhwi (TYPE_SIZE (type)))
30684 {
30685 case 64:
30686 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30687 case 128:
30688 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30689 case 256:
30690 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30691 }
30692 }
30693 return NULL_TREE;
30694 }
30695 \f
30696 /* Initialize the transactional memory vector load/store builtins. */
30697
30698 static void
30699 ix86_init_tm_builtins (void)
30700 {
30701 enum ix86_builtin_func_type ftype;
30702 const struct builtin_description *d;
30703 size_t i;
30704 tree decl;
30705 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30706 tree attrs_log, attrs_type_log;
30707
30708 if (!flag_tm)
30709 return;
30710
30711 /* If there are no builtins defined, we must be compiling in a
30712 language without trans-mem support. */
30713 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30714 return;
30715
30716 /* Use whatever attributes a normal TM load has. */
30717 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30718 attrs_load = DECL_ATTRIBUTES (decl);
30719 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30720 /* Use whatever attributes a normal TM store has. */
30721 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30722 attrs_store = DECL_ATTRIBUTES (decl);
30723 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30724 /* Use whatever attributes a normal TM log has. */
30725 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30726 attrs_log = DECL_ATTRIBUTES (decl);
30727 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30728
30729 for (i = 0, d = bdesc_tm;
30730 i < ARRAY_SIZE (bdesc_tm);
30731 i++, d++)
30732 {
30733 if ((d->mask & ix86_isa_flags) != 0
30734 || (lang_hooks.builtin_function
30735 == lang_hooks.builtin_function_ext_scope))
30736 {
30737 tree type, attrs, attrs_type;
30738 enum built_in_function code = (enum built_in_function) d->code;
30739
30740 ftype = (enum ix86_builtin_func_type) d->flag;
30741 type = ix86_get_builtin_func_type (ftype);
30742
30743 if (BUILTIN_TM_LOAD_P (code))
30744 {
30745 attrs = attrs_load;
30746 attrs_type = attrs_type_load;
30747 }
30748 else if (BUILTIN_TM_STORE_P (code))
30749 {
30750 attrs = attrs_store;
30751 attrs_type = attrs_type_store;
30752 }
30753 else
30754 {
30755 attrs = attrs_log;
30756 attrs_type = attrs_type_log;
30757 }
30758 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30759 /* The builtin without the prefix for
30760 calling it directly. */
30761 d->name + strlen ("__builtin_"),
30762 attrs);
30763 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30764 set the TYPE_ATTRIBUTES. */
30765 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30766
30767 set_builtin_decl (code, decl, false);
30768 }
30769 }
30770 }
30771
30772 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30773 in the current target ISA to allow the user to compile particular modules
30774 with different target specific options that differ from the command line
30775 options. */
30776 static void
30777 ix86_init_mmx_sse_builtins (void)
30778 {
30779 const struct builtin_description * d;
30780 enum ix86_builtin_func_type ftype;
30781 size_t i;
30782
30783 /* Add all special builtins with variable number of operands. */
30784 for (i = 0, d = bdesc_special_args;
30785 i < ARRAY_SIZE (bdesc_special_args);
30786 i++, d++)
30787 {
30788 if (d->name == 0)
30789 continue;
30790
30791 ftype = (enum ix86_builtin_func_type) d->flag;
30792 def_builtin (d->mask, d->name, ftype, d->code);
30793 }
30794
30795 /* Add all builtins with variable number of operands. */
30796 for (i = 0, d = bdesc_args;
30797 i < ARRAY_SIZE (bdesc_args);
30798 i++, d++)
30799 {
30800 if (d->name == 0)
30801 continue;
30802
30803 ftype = (enum ix86_builtin_func_type) d->flag;
30804 def_builtin_const (d->mask, d->name, ftype, d->code);
30805 }
30806
30807 /* Add all builtins with rounding. */
30808 for (i = 0, d = bdesc_round_args;
30809 i < ARRAY_SIZE (bdesc_round_args);
30810 i++, d++)
30811 {
30812 if (d->name == 0)
30813 continue;
30814
30815 ftype = (enum ix86_builtin_func_type) d->flag;
30816 def_builtin_const (d->mask, d->name, ftype, d->code);
30817 }
30818
30819 /* pcmpestr[im] insns. */
30820 for (i = 0, d = bdesc_pcmpestr;
30821 i < ARRAY_SIZE (bdesc_pcmpestr);
30822 i++, d++)
30823 {
30824 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30825 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30826 else
30827 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30828 def_builtin_const (d->mask, d->name, ftype, d->code);
30829 }
30830
30831 /* pcmpistr[im] insns. */
30832 for (i = 0, d = bdesc_pcmpistr;
30833 i < ARRAY_SIZE (bdesc_pcmpistr);
30834 i++, d++)
30835 {
30836 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30837 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30838 else
30839 ftype = INT_FTYPE_V16QI_V16QI_INT;
30840 def_builtin_const (d->mask, d->name, ftype, d->code);
30841 }
30842
30843 /* comi/ucomi insns. */
30844 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30845 {
30846 if (d->mask == OPTION_MASK_ISA_SSE2)
30847 ftype = INT_FTYPE_V2DF_V2DF;
30848 else
30849 ftype = INT_FTYPE_V4SF_V4SF;
30850 def_builtin_const (d->mask, d->name, ftype, d->code);
30851 }
30852
30853 /* SSE */
30854 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30855 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30856 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30857 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30858
30859 /* SSE or 3DNow!A */
30860 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30861 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30862 IX86_BUILTIN_MASKMOVQ);
30863
30864 /* SSE2 */
30865 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30866 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30867
30868 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30869 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30870 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30871 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30872
30873 /* SSE3. */
30874 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30875 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30876 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30877 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30878
30879 /* AES */
30880 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30881 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30882 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30883 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30884 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30885 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30886 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30887 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30888 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30889 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30890 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30891 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30892
30893 /* PCLMUL */
30894 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30895 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30896
30897 /* RDRND */
30898 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30899 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30900 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30901 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30902 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30903 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30904 IX86_BUILTIN_RDRAND64_STEP);
30905
30906 /* AVX2 */
30907 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30908 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30909 IX86_BUILTIN_GATHERSIV2DF);
30910
30911 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30912 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30913 IX86_BUILTIN_GATHERSIV4DF);
30914
30915 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30916 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30917 IX86_BUILTIN_GATHERDIV2DF);
30918
30919 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30920 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30921 IX86_BUILTIN_GATHERDIV4DF);
30922
30923 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30924 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30925 IX86_BUILTIN_GATHERSIV4SF);
30926
30927 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30928 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30929 IX86_BUILTIN_GATHERSIV8SF);
30930
30931 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30932 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30933 IX86_BUILTIN_GATHERDIV4SF);
30934
30935 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30936 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30937 IX86_BUILTIN_GATHERDIV8SF);
30938
30939 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30940 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30941 IX86_BUILTIN_GATHERSIV2DI);
30942
30943 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30944 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30945 IX86_BUILTIN_GATHERSIV4DI);
30946
30947 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30948 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30949 IX86_BUILTIN_GATHERDIV2DI);
30950
30951 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30952 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30953 IX86_BUILTIN_GATHERDIV4DI);
30954
30955 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30956 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30957 IX86_BUILTIN_GATHERSIV4SI);
30958
30959 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30960 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30961 IX86_BUILTIN_GATHERSIV8SI);
30962
30963 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30964 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30965 IX86_BUILTIN_GATHERDIV4SI);
30966
30967 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30968 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30969 IX86_BUILTIN_GATHERDIV8SI);
30970
30971 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30972 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30973 IX86_BUILTIN_GATHERALTSIV4DF);
30974
30975 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30976 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30977 IX86_BUILTIN_GATHERALTDIV8SF);
30978
30979 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30980 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30981 IX86_BUILTIN_GATHERALTSIV4DI);
30982
30983 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30984 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30985 IX86_BUILTIN_GATHERALTDIV8SI);
30986
30987 /* AVX512F */
30988 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30989 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30990 IX86_BUILTIN_GATHER3SIV16SF);
30991
30992 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30993 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30994 IX86_BUILTIN_GATHER3SIV8DF);
30995
30996 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30997 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30998 IX86_BUILTIN_GATHER3DIV16SF);
30999
31000 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31001 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31002 IX86_BUILTIN_GATHER3DIV8DF);
31003
31004 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31005 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31006 IX86_BUILTIN_GATHER3SIV16SI);
31007
31008 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31009 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31010 IX86_BUILTIN_GATHER3SIV8DI);
31011
31012 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31013 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31014 IX86_BUILTIN_GATHER3DIV16SI);
31015
31016 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31017 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31018 IX86_BUILTIN_GATHER3DIV8DI);
31019
31020 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31021 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31022 IX86_BUILTIN_GATHER3ALTSIV8DF);
31023
31024 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31025 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31026 IX86_BUILTIN_GATHER3ALTDIV16SF);
31027
31028 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31029 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31030 IX86_BUILTIN_GATHER3ALTSIV8DI);
31031
31032 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31033 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31034 IX86_BUILTIN_GATHER3ALTDIV16SI);
31035
31036 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31037 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31038 IX86_BUILTIN_SCATTERSIV16SF);
31039
31040 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31041 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31042 IX86_BUILTIN_SCATTERSIV8DF);
31043
31044 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31045 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31046 IX86_BUILTIN_SCATTERDIV16SF);
31047
31048 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31049 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31050 IX86_BUILTIN_SCATTERDIV8DF);
31051
31052 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31053 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31054 IX86_BUILTIN_SCATTERSIV16SI);
31055
31056 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31057 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31058 IX86_BUILTIN_SCATTERSIV8DI);
31059
31060 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31061 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31062 IX86_BUILTIN_SCATTERDIV16SI);
31063
31064 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31065 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31066 IX86_BUILTIN_SCATTERDIV8DI);
31067
31068 /* AVX512PF */
31069 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31070 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31071 IX86_BUILTIN_GATHERPFDPD);
31072 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31073 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31074 IX86_BUILTIN_GATHERPFDPS);
31075 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31076 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31077 IX86_BUILTIN_GATHERPFQPD);
31078 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31079 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31080 IX86_BUILTIN_GATHERPFQPS);
31081 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31082 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31083 IX86_BUILTIN_SCATTERPFDPD);
31084 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31085 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31086 IX86_BUILTIN_SCATTERPFDPS);
31087 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31088 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31089 IX86_BUILTIN_SCATTERPFQPD);
31090 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31091 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31092 IX86_BUILTIN_SCATTERPFQPS);
31093
31094 /* SHA */
31095 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31096 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31097 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31098 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31099 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31100 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31101 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31102 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31103 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31104 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31105 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31106 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31107 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31108 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31109
31110 /* RTM. */
31111 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31112 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31113
31114 /* MMX access to the vec_init patterns. */
31115 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31116 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31117
31118 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31119 V4HI_FTYPE_HI_HI_HI_HI,
31120 IX86_BUILTIN_VEC_INIT_V4HI);
31121
31122 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31123 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31124 IX86_BUILTIN_VEC_INIT_V8QI);
31125
31126 /* Access to the vec_extract patterns. */
31127 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31128 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31129 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31130 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31131 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31132 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31133 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31134 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31135 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31136 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31137
31138 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31139 "__builtin_ia32_vec_ext_v4hi",
31140 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31141
31142 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31143 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31144
31145 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31146 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31147
31148 /* Access to the vec_set patterns. */
31149 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31150 "__builtin_ia32_vec_set_v2di",
31151 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31152
31153 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31154 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31155
31156 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31157 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31158
31159 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31160 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31161
31162 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31163 "__builtin_ia32_vec_set_v4hi",
31164 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31165
31166 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31167 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31168
31169 /* RDSEED */
31170 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31171 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31172 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31173 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31174 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31175 "__builtin_ia32_rdseed_di_step",
31176 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31177
31178 /* ADCX */
31179 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31180 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31181 def_builtin (OPTION_MASK_ISA_64BIT,
31182 "__builtin_ia32_addcarryx_u64",
31183 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31184 IX86_BUILTIN_ADDCARRYX64);
31185
31186 /* Read/write FLAGS. */
31187 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31188 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31189 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31190 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31191 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31192 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31193 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31194 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31195
31196 /* CLFLUSHOPT. */
31197 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31198 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31199
31200 /* Add FMA4 multi-arg argument instructions */
31201 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31202 {
31203 if (d->name == 0)
31204 continue;
31205
31206 ftype = (enum ix86_builtin_func_type) d->flag;
31207 def_builtin_const (d->mask, d->name, ftype, d->code);
31208 }
31209 }
31210
31211 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31212 to return a pointer to VERSION_DECL if the outcome of the expression
31213 formed by PREDICATE_CHAIN is true. This function will be called during
31214 version dispatch to decide which function version to execute. It returns
31215 the basic block at the end, to which more conditions can be added. */
31216
31217 static basic_block
31218 add_condition_to_bb (tree function_decl, tree version_decl,
31219 tree predicate_chain, basic_block new_bb)
31220 {
31221 gimple return_stmt;
31222 tree convert_expr, result_var;
31223 gimple convert_stmt;
31224 gimple call_cond_stmt;
31225 gimple if_else_stmt;
31226
31227 basic_block bb1, bb2, bb3;
31228 edge e12, e23;
31229
31230 tree cond_var, and_expr_var = NULL_TREE;
31231 gimple_seq gseq;
31232
31233 tree predicate_decl, predicate_arg;
31234
31235 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31236
31237 gcc_assert (new_bb != NULL);
31238 gseq = bb_seq (new_bb);
31239
31240
31241 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31242 build_fold_addr_expr (version_decl));
31243 result_var = create_tmp_var (ptr_type_node, NULL);
31244 convert_stmt = gimple_build_assign (result_var, convert_expr);
31245 return_stmt = gimple_build_return (result_var);
31246
31247 if (predicate_chain == NULL_TREE)
31248 {
31249 gimple_seq_add_stmt (&gseq, convert_stmt);
31250 gimple_seq_add_stmt (&gseq, return_stmt);
31251 set_bb_seq (new_bb, gseq);
31252 gimple_set_bb (convert_stmt, new_bb);
31253 gimple_set_bb (return_stmt, new_bb);
31254 pop_cfun ();
31255 return new_bb;
31256 }
31257
31258 while (predicate_chain != NULL)
31259 {
31260 cond_var = create_tmp_var (integer_type_node, NULL);
31261 predicate_decl = TREE_PURPOSE (predicate_chain);
31262 predicate_arg = TREE_VALUE (predicate_chain);
31263 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31264 gimple_call_set_lhs (call_cond_stmt, cond_var);
31265
31266 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31267 gimple_set_bb (call_cond_stmt, new_bb);
31268 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31269
31270 predicate_chain = TREE_CHAIN (predicate_chain);
31271
31272 if (and_expr_var == NULL)
31273 and_expr_var = cond_var;
31274 else
31275 {
31276 gimple assign_stmt;
31277 /* Use MIN_EXPR to check if any integer is zero?.
31278 and_expr_var = min_expr <cond_var, and_expr_var> */
31279 assign_stmt = gimple_build_assign (and_expr_var,
31280 build2 (MIN_EXPR, integer_type_node,
31281 cond_var, and_expr_var));
31282
31283 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31284 gimple_set_bb (assign_stmt, new_bb);
31285 gimple_seq_add_stmt (&gseq, assign_stmt);
31286 }
31287 }
31288
31289 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31290 integer_zero_node,
31291 NULL_TREE, NULL_TREE);
31292 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31293 gimple_set_bb (if_else_stmt, new_bb);
31294 gimple_seq_add_stmt (&gseq, if_else_stmt);
31295
31296 gimple_seq_add_stmt (&gseq, convert_stmt);
31297 gimple_seq_add_stmt (&gseq, return_stmt);
31298 set_bb_seq (new_bb, gseq);
31299
31300 bb1 = new_bb;
31301 e12 = split_block (bb1, if_else_stmt);
31302 bb2 = e12->dest;
31303 e12->flags &= ~EDGE_FALLTHRU;
31304 e12->flags |= EDGE_TRUE_VALUE;
31305
31306 e23 = split_block (bb2, return_stmt);
31307
31308 gimple_set_bb (convert_stmt, bb2);
31309 gimple_set_bb (return_stmt, bb2);
31310
31311 bb3 = e23->dest;
31312 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31313
31314 remove_edge (e23);
31315 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31316
31317 pop_cfun ();
31318
31319 return bb3;
31320 }
31321
31322 /* This parses the attribute arguments to target in DECL and determines
31323 the right builtin to use to match the platform specification.
31324 It returns the priority value for this version decl. If PREDICATE_LIST
31325 is not NULL, it stores the list of cpu features that need to be checked
31326 before dispatching this function. */
31327
31328 static unsigned int
31329 get_builtin_code_for_version (tree decl, tree *predicate_list)
31330 {
31331 tree attrs;
31332 struct cl_target_option cur_target;
31333 tree target_node;
31334 struct cl_target_option *new_target;
31335 const char *arg_str = NULL;
31336 const char *attrs_str = NULL;
31337 char *tok_str = NULL;
31338 char *token;
31339
31340 /* Priority of i386 features, greater value is higher priority. This is
31341 used to decide the order in which function dispatch must happen. For
31342 instance, a version specialized for SSE4.2 should be checked for dispatch
31343 before a version for SSE3, as SSE4.2 implies SSE3. */
31344 enum feature_priority
31345 {
31346 P_ZERO = 0,
31347 P_MMX,
31348 P_SSE,
31349 P_SSE2,
31350 P_SSE3,
31351 P_SSSE3,
31352 P_PROC_SSSE3,
31353 P_SSE4_A,
31354 P_PROC_SSE4_A,
31355 P_SSE4_1,
31356 P_SSE4_2,
31357 P_PROC_SSE4_2,
31358 P_POPCNT,
31359 P_AVX,
31360 P_PROC_AVX,
31361 P_FMA4,
31362 P_XOP,
31363 P_PROC_XOP,
31364 P_FMA,
31365 P_PROC_FMA,
31366 P_AVX2,
31367 P_PROC_AVX2
31368 };
31369
31370 enum feature_priority priority = P_ZERO;
31371
31372 /* These are the target attribute strings for which a dispatcher is
31373 available, from fold_builtin_cpu. */
31374
31375 static struct _feature_list
31376 {
31377 const char *const name;
31378 const enum feature_priority priority;
31379 }
31380 const feature_list[] =
31381 {
31382 {"mmx", P_MMX},
31383 {"sse", P_SSE},
31384 {"sse2", P_SSE2},
31385 {"sse3", P_SSE3},
31386 {"sse4a", P_SSE4_A},
31387 {"ssse3", P_SSSE3},
31388 {"sse4.1", P_SSE4_1},
31389 {"sse4.2", P_SSE4_2},
31390 {"popcnt", P_POPCNT},
31391 {"avx", P_AVX},
31392 {"fma4", P_FMA4},
31393 {"xop", P_XOP},
31394 {"fma", P_FMA},
31395 {"avx2", P_AVX2}
31396 };
31397
31398
31399 static unsigned int NUM_FEATURES
31400 = sizeof (feature_list) / sizeof (struct _feature_list);
31401
31402 unsigned int i;
31403
31404 tree predicate_chain = NULL_TREE;
31405 tree predicate_decl, predicate_arg;
31406
31407 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31408 gcc_assert (attrs != NULL);
31409
31410 attrs = TREE_VALUE (TREE_VALUE (attrs));
31411
31412 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31413 attrs_str = TREE_STRING_POINTER (attrs);
31414
31415 /* Return priority zero for default function. */
31416 if (strcmp (attrs_str, "default") == 0)
31417 return 0;
31418
31419 /* Handle arch= if specified. For priority, set it to be 1 more than
31420 the best instruction set the processor can handle. For instance, if
31421 there is a version for atom and a version for ssse3 (the highest ISA
31422 priority for atom), the atom version must be checked for dispatch
31423 before the ssse3 version. */
31424 if (strstr (attrs_str, "arch=") != NULL)
31425 {
31426 cl_target_option_save (&cur_target, &global_options);
31427 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31428 &global_options_set);
31429
31430 gcc_assert (target_node);
31431 new_target = TREE_TARGET_OPTION (target_node);
31432 gcc_assert (new_target);
31433
31434 if (new_target->arch_specified && new_target->arch > 0)
31435 {
31436 switch (new_target->arch)
31437 {
31438 case PROCESSOR_CORE2:
31439 arg_str = "core2";
31440 priority = P_PROC_SSSE3;
31441 break;
31442 case PROCESSOR_NEHALEM:
31443 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31444 arg_str = "westmere";
31445 else
31446 /* We translate "arch=corei7" and "arch=nehalem" to
31447 "corei7" so that it will be mapped to M_INTEL_COREI7
31448 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31449 arg_str = "corei7";
31450 priority = P_PROC_SSE4_2;
31451 break;
31452 case PROCESSOR_SANDYBRIDGE:
31453 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31454 arg_str = "ivybridge";
31455 else
31456 arg_str = "sandybridge";
31457 priority = P_PROC_AVX;
31458 break;
31459 case PROCESSOR_HASWELL:
31460 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31461 arg_str = "broadwell";
31462 else
31463 arg_str = "haswell";
31464 priority = P_PROC_AVX2;
31465 break;
31466 case PROCESSOR_BONNELL:
31467 arg_str = "bonnell";
31468 priority = P_PROC_SSSE3;
31469 break;
31470 case PROCESSOR_SILVERMONT:
31471 arg_str = "silvermont";
31472 priority = P_PROC_SSE4_2;
31473 break;
31474 case PROCESSOR_AMDFAM10:
31475 arg_str = "amdfam10h";
31476 priority = P_PROC_SSE4_A;
31477 break;
31478 case PROCESSOR_BTVER1:
31479 arg_str = "btver1";
31480 priority = P_PROC_SSE4_A;
31481 break;
31482 case PROCESSOR_BTVER2:
31483 arg_str = "btver2";
31484 priority = P_PROC_AVX;
31485 break;
31486 case PROCESSOR_BDVER1:
31487 arg_str = "bdver1";
31488 priority = P_PROC_XOP;
31489 break;
31490 case PROCESSOR_BDVER2:
31491 arg_str = "bdver2";
31492 priority = P_PROC_FMA;
31493 break;
31494 case PROCESSOR_BDVER3:
31495 arg_str = "bdver3";
31496 priority = P_PROC_FMA;
31497 break;
31498 case PROCESSOR_BDVER4:
31499 arg_str = "bdver4";
31500 priority = P_PROC_AVX2;
31501 break;
31502 }
31503 }
31504
31505 cl_target_option_restore (&global_options, &cur_target);
31506
31507 if (predicate_list && arg_str == NULL)
31508 {
31509 error_at (DECL_SOURCE_LOCATION (decl),
31510 "No dispatcher found for the versioning attributes");
31511 return 0;
31512 }
31513
31514 if (predicate_list)
31515 {
31516 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31517 /* For a C string literal the length includes the trailing NULL. */
31518 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31519 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31520 predicate_chain);
31521 }
31522 }
31523
31524 /* Process feature name. */
31525 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31526 strcpy (tok_str, attrs_str);
31527 token = strtok (tok_str, ",");
31528 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31529
31530 while (token != NULL)
31531 {
31532 /* Do not process "arch=" */
31533 if (strncmp (token, "arch=", 5) == 0)
31534 {
31535 token = strtok (NULL, ",");
31536 continue;
31537 }
31538 for (i = 0; i < NUM_FEATURES; ++i)
31539 {
31540 if (strcmp (token, feature_list[i].name) == 0)
31541 {
31542 if (predicate_list)
31543 {
31544 predicate_arg = build_string_literal (
31545 strlen (feature_list[i].name) + 1,
31546 feature_list[i].name);
31547 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31548 predicate_chain);
31549 }
31550 /* Find the maximum priority feature. */
31551 if (feature_list[i].priority > priority)
31552 priority = feature_list[i].priority;
31553
31554 break;
31555 }
31556 }
31557 if (predicate_list && i == NUM_FEATURES)
31558 {
31559 error_at (DECL_SOURCE_LOCATION (decl),
31560 "No dispatcher found for %s", token);
31561 return 0;
31562 }
31563 token = strtok (NULL, ",");
31564 }
31565 free (tok_str);
31566
31567 if (predicate_list && predicate_chain == NULL_TREE)
31568 {
31569 error_at (DECL_SOURCE_LOCATION (decl),
31570 "No dispatcher found for the versioning attributes : %s",
31571 attrs_str);
31572 return 0;
31573 }
31574 else if (predicate_list)
31575 {
31576 predicate_chain = nreverse (predicate_chain);
31577 *predicate_list = predicate_chain;
31578 }
31579
31580 return priority;
31581 }
31582
31583 /* This compares the priority of target features in function DECL1
31584 and DECL2. It returns positive value if DECL1 is higher priority,
31585 negative value if DECL2 is higher priority and 0 if they are the
31586 same. */
31587
31588 static int
31589 ix86_compare_version_priority (tree decl1, tree decl2)
31590 {
31591 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31592 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31593
31594 return (int)priority1 - (int)priority2;
31595 }
31596
31597 /* V1 and V2 point to function versions with different priorities
31598 based on the target ISA. This function compares their priorities. */
31599
31600 static int
31601 feature_compare (const void *v1, const void *v2)
31602 {
31603 typedef struct _function_version_info
31604 {
31605 tree version_decl;
31606 tree predicate_chain;
31607 unsigned int dispatch_priority;
31608 } function_version_info;
31609
31610 const function_version_info c1 = *(const function_version_info *)v1;
31611 const function_version_info c2 = *(const function_version_info *)v2;
31612 return (c2.dispatch_priority - c1.dispatch_priority);
31613 }
31614
31615 /* This function generates the dispatch function for
31616 multi-versioned functions. DISPATCH_DECL is the function which will
31617 contain the dispatch logic. FNDECLS are the function choices for
31618 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31619 in DISPATCH_DECL in which the dispatch code is generated. */
31620
31621 static int
31622 dispatch_function_versions (tree dispatch_decl,
31623 void *fndecls_p,
31624 basic_block *empty_bb)
31625 {
31626 tree default_decl;
31627 gimple ifunc_cpu_init_stmt;
31628 gimple_seq gseq;
31629 int ix;
31630 tree ele;
31631 vec<tree> *fndecls;
31632 unsigned int num_versions = 0;
31633 unsigned int actual_versions = 0;
31634 unsigned int i;
31635
31636 struct _function_version_info
31637 {
31638 tree version_decl;
31639 tree predicate_chain;
31640 unsigned int dispatch_priority;
31641 }*function_version_info;
31642
31643 gcc_assert (dispatch_decl != NULL
31644 && fndecls_p != NULL
31645 && empty_bb != NULL);
31646
31647 /*fndecls_p is actually a vector. */
31648 fndecls = static_cast<vec<tree> *> (fndecls_p);
31649
31650 /* At least one more version other than the default. */
31651 num_versions = fndecls->length ();
31652 gcc_assert (num_versions >= 2);
31653
31654 function_version_info = (struct _function_version_info *)
31655 XNEWVEC (struct _function_version_info, (num_versions - 1));
31656
31657 /* The first version in the vector is the default decl. */
31658 default_decl = (*fndecls)[0];
31659
31660 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31661
31662 gseq = bb_seq (*empty_bb);
31663 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31664 constructors, so explicity call __builtin_cpu_init here. */
31665 ifunc_cpu_init_stmt = gimple_build_call_vec (
31666 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31667 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31668 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31669 set_bb_seq (*empty_bb, gseq);
31670
31671 pop_cfun ();
31672
31673
31674 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31675 {
31676 tree version_decl = ele;
31677 tree predicate_chain = NULL_TREE;
31678 unsigned int priority;
31679 /* Get attribute string, parse it and find the right predicate decl.
31680 The predicate function could be a lengthy combination of many
31681 features, like arch-type and various isa-variants. */
31682 priority = get_builtin_code_for_version (version_decl,
31683 &predicate_chain);
31684
31685 if (predicate_chain == NULL_TREE)
31686 continue;
31687
31688 function_version_info [actual_versions].version_decl = version_decl;
31689 function_version_info [actual_versions].predicate_chain
31690 = predicate_chain;
31691 function_version_info [actual_versions].dispatch_priority = priority;
31692 actual_versions++;
31693 }
31694
31695 /* Sort the versions according to descending order of dispatch priority. The
31696 priority is based on the ISA. This is not a perfect solution. There
31697 could still be ambiguity. If more than one function version is suitable
31698 to execute, which one should be dispatched? In future, allow the user
31699 to specify a dispatch priority next to the version. */
31700 qsort (function_version_info, actual_versions,
31701 sizeof (struct _function_version_info), feature_compare);
31702
31703 for (i = 0; i < actual_versions; ++i)
31704 *empty_bb = add_condition_to_bb (dispatch_decl,
31705 function_version_info[i].version_decl,
31706 function_version_info[i].predicate_chain,
31707 *empty_bb);
31708
31709 /* dispatch default version at the end. */
31710 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31711 NULL, *empty_bb);
31712
31713 free (function_version_info);
31714 return 0;
31715 }
31716
31717 /* Comparator function to be used in qsort routine to sort attribute
31718 specification strings to "target". */
31719
31720 static int
31721 attr_strcmp (const void *v1, const void *v2)
31722 {
31723 const char *c1 = *(char *const*)v1;
31724 const char *c2 = *(char *const*)v2;
31725 return strcmp (c1, c2);
31726 }
31727
31728 /* ARGLIST is the argument to target attribute. This function tokenizes
31729 the comma separated arguments, sorts them and returns a string which
31730 is a unique identifier for the comma separated arguments. It also
31731 replaces non-identifier characters "=,-" with "_". */
31732
31733 static char *
31734 sorted_attr_string (tree arglist)
31735 {
31736 tree arg;
31737 size_t str_len_sum = 0;
31738 char **args = NULL;
31739 char *attr_str, *ret_str;
31740 char *attr = NULL;
31741 unsigned int argnum = 1;
31742 unsigned int i;
31743
31744 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31745 {
31746 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31747 size_t len = strlen (str);
31748 str_len_sum += len + 1;
31749 if (arg != arglist)
31750 argnum++;
31751 for (i = 0; i < strlen (str); i++)
31752 if (str[i] == ',')
31753 argnum++;
31754 }
31755
31756 attr_str = XNEWVEC (char, str_len_sum);
31757 str_len_sum = 0;
31758 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31759 {
31760 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31761 size_t len = strlen (str);
31762 memcpy (attr_str + str_len_sum, str, len);
31763 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31764 str_len_sum += len + 1;
31765 }
31766
31767 /* Replace "=,-" with "_". */
31768 for (i = 0; i < strlen (attr_str); i++)
31769 if (attr_str[i] == '=' || attr_str[i]== '-')
31770 attr_str[i] = '_';
31771
31772 if (argnum == 1)
31773 return attr_str;
31774
31775 args = XNEWVEC (char *, argnum);
31776
31777 i = 0;
31778 attr = strtok (attr_str, ",");
31779 while (attr != NULL)
31780 {
31781 args[i] = attr;
31782 i++;
31783 attr = strtok (NULL, ",");
31784 }
31785
31786 qsort (args, argnum, sizeof (char *), attr_strcmp);
31787
31788 ret_str = XNEWVEC (char, str_len_sum);
31789 str_len_sum = 0;
31790 for (i = 0; i < argnum; i++)
31791 {
31792 size_t len = strlen (args[i]);
31793 memcpy (ret_str + str_len_sum, args[i], len);
31794 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31795 str_len_sum += len + 1;
31796 }
31797
31798 XDELETEVEC (args);
31799 XDELETEVEC (attr_str);
31800 return ret_str;
31801 }
31802
31803 /* This function changes the assembler name for functions that are
31804 versions. If DECL is a function version and has a "target"
31805 attribute, it appends the attribute string to its assembler name. */
31806
31807 static tree
31808 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31809 {
31810 tree version_attr;
31811 const char *orig_name, *version_string;
31812 char *attr_str, *assembler_name;
31813
31814 if (DECL_DECLARED_INLINE_P (decl)
31815 && lookup_attribute ("gnu_inline",
31816 DECL_ATTRIBUTES (decl)))
31817 error_at (DECL_SOURCE_LOCATION (decl),
31818 "Function versions cannot be marked as gnu_inline,"
31819 " bodies have to be generated");
31820
31821 if (DECL_VIRTUAL_P (decl)
31822 || DECL_VINDEX (decl))
31823 sorry ("Virtual function multiversioning not supported");
31824
31825 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31826
31827 /* target attribute string cannot be NULL. */
31828 gcc_assert (version_attr != NULL_TREE);
31829
31830 orig_name = IDENTIFIER_POINTER (id);
31831 version_string
31832 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31833
31834 if (strcmp (version_string, "default") == 0)
31835 return id;
31836
31837 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31838 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31839
31840 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31841
31842 /* Allow assembler name to be modified if already set. */
31843 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31844 SET_DECL_RTL (decl, NULL);
31845
31846 tree ret = get_identifier (assembler_name);
31847 XDELETEVEC (attr_str);
31848 XDELETEVEC (assembler_name);
31849 return ret;
31850 }
31851
31852 /* This function returns true if FN1 and FN2 are versions of the same function,
31853 that is, the target strings of the function decls are different. This assumes
31854 that FN1 and FN2 have the same signature. */
31855
31856 static bool
31857 ix86_function_versions (tree fn1, tree fn2)
31858 {
31859 tree attr1, attr2;
31860 char *target1, *target2;
31861 bool result;
31862
31863 if (TREE_CODE (fn1) != FUNCTION_DECL
31864 || TREE_CODE (fn2) != FUNCTION_DECL)
31865 return false;
31866
31867 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31868 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31869
31870 /* At least one function decl should have the target attribute specified. */
31871 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31872 return false;
31873
31874 /* Diagnose missing target attribute if one of the decls is already
31875 multi-versioned. */
31876 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31877 {
31878 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31879 {
31880 if (attr2 != NULL_TREE)
31881 {
31882 tree tem = fn1;
31883 fn1 = fn2;
31884 fn2 = tem;
31885 attr1 = attr2;
31886 }
31887 error_at (DECL_SOURCE_LOCATION (fn2),
31888 "missing %<target%> attribute for multi-versioned %D",
31889 fn2);
31890 inform (DECL_SOURCE_LOCATION (fn1),
31891 "previous declaration of %D", fn1);
31892 /* Prevent diagnosing of the same error multiple times. */
31893 DECL_ATTRIBUTES (fn2)
31894 = tree_cons (get_identifier ("target"),
31895 copy_node (TREE_VALUE (attr1)),
31896 DECL_ATTRIBUTES (fn2));
31897 }
31898 return false;
31899 }
31900
31901 target1 = sorted_attr_string (TREE_VALUE (attr1));
31902 target2 = sorted_attr_string (TREE_VALUE (attr2));
31903
31904 /* The sorted target strings must be different for fn1 and fn2
31905 to be versions. */
31906 if (strcmp (target1, target2) == 0)
31907 result = false;
31908 else
31909 result = true;
31910
31911 XDELETEVEC (target1);
31912 XDELETEVEC (target2);
31913
31914 return result;
31915 }
31916
31917 static tree
31918 ix86_mangle_decl_assembler_name (tree decl, tree id)
31919 {
31920 /* For function version, add the target suffix to the assembler name. */
31921 if (TREE_CODE (decl) == FUNCTION_DECL
31922 && DECL_FUNCTION_VERSIONED (decl))
31923 id = ix86_mangle_function_version_assembler_name (decl, id);
31924 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31925 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31926 #endif
31927
31928 return id;
31929 }
31930
31931 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31932 is true, append the full path name of the source file. */
31933
31934 static char *
31935 make_name (tree decl, const char *suffix, bool make_unique)
31936 {
31937 char *global_var_name;
31938 int name_len;
31939 const char *name;
31940 const char *unique_name = NULL;
31941
31942 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31943
31944 /* Get a unique name that can be used globally without any chances
31945 of collision at link time. */
31946 if (make_unique)
31947 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31948
31949 name_len = strlen (name) + strlen (suffix) + 2;
31950
31951 if (make_unique)
31952 name_len += strlen (unique_name) + 1;
31953 global_var_name = XNEWVEC (char, name_len);
31954
31955 /* Use '.' to concatenate names as it is demangler friendly. */
31956 if (make_unique)
31957 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31958 suffix);
31959 else
31960 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31961
31962 return global_var_name;
31963 }
31964
31965 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31966
31967 /* Make a dispatcher declaration for the multi-versioned function DECL.
31968 Calls to DECL function will be replaced with calls to the dispatcher
31969 by the front-end. Return the decl created. */
31970
31971 static tree
31972 make_dispatcher_decl (const tree decl)
31973 {
31974 tree func_decl;
31975 char *func_name;
31976 tree fn_type, func_type;
31977 bool is_uniq = false;
31978
31979 if (TREE_PUBLIC (decl) == 0)
31980 is_uniq = true;
31981
31982 func_name = make_name (decl, "ifunc", is_uniq);
31983
31984 fn_type = TREE_TYPE (decl);
31985 func_type = build_function_type (TREE_TYPE (fn_type),
31986 TYPE_ARG_TYPES (fn_type));
31987
31988 func_decl = build_fn_decl (func_name, func_type);
31989 XDELETEVEC (func_name);
31990 TREE_USED (func_decl) = 1;
31991 DECL_CONTEXT (func_decl) = NULL_TREE;
31992 DECL_INITIAL (func_decl) = error_mark_node;
31993 DECL_ARTIFICIAL (func_decl) = 1;
31994 /* Mark this func as external, the resolver will flip it again if
31995 it gets generated. */
31996 DECL_EXTERNAL (func_decl) = 1;
31997 /* This will be of type IFUNCs have to be externally visible. */
31998 TREE_PUBLIC (func_decl) = 1;
31999
32000 return func_decl;
32001 }
32002
32003 #endif
32004
32005 /* Returns true if decl is multi-versioned and DECL is the default function,
32006 that is it is not tagged with target specific optimization. */
32007
32008 static bool
32009 is_function_default_version (const tree decl)
32010 {
32011 if (TREE_CODE (decl) != FUNCTION_DECL
32012 || !DECL_FUNCTION_VERSIONED (decl))
32013 return false;
32014 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32015 gcc_assert (attr);
32016 attr = TREE_VALUE (TREE_VALUE (attr));
32017 return (TREE_CODE (attr) == STRING_CST
32018 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32019 }
32020
32021 /* Make a dispatcher declaration for the multi-versioned function DECL.
32022 Calls to DECL function will be replaced with calls to the dispatcher
32023 by the front-end. Returns the decl of the dispatcher function. */
32024
32025 static tree
32026 ix86_get_function_versions_dispatcher (void *decl)
32027 {
32028 tree fn = (tree) decl;
32029 struct cgraph_node *node = NULL;
32030 struct cgraph_node *default_node = NULL;
32031 struct cgraph_function_version_info *node_v = NULL;
32032 struct cgraph_function_version_info *first_v = NULL;
32033
32034 tree dispatch_decl = NULL;
32035
32036 struct cgraph_function_version_info *default_version_info = NULL;
32037
32038 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32039
32040 node = cgraph_node::get (fn);
32041 gcc_assert (node != NULL);
32042
32043 node_v = node->function_version ();
32044 gcc_assert (node_v != NULL);
32045
32046 if (node_v->dispatcher_resolver != NULL)
32047 return node_v->dispatcher_resolver;
32048
32049 /* Find the default version and make it the first node. */
32050 first_v = node_v;
32051 /* Go to the beginning of the chain. */
32052 while (first_v->prev != NULL)
32053 first_v = first_v->prev;
32054 default_version_info = first_v;
32055 while (default_version_info != NULL)
32056 {
32057 if (is_function_default_version
32058 (default_version_info->this_node->decl))
32059 break;
32060 default_version_info = default_version_info->next;
32061 }
32062
32063 /* If there is no default node, just return NULL. */
32064 if (default_version_info == NULL)
32065 return NULL;
32066
32067 /* Make default info the first node. */
32068 if (first_v != default_version_info)
32069 {
32070 default_version_info->prev->next = default_version_info->next;
32071 if (default_version_info->next)
32072 default_version_info->next->prev = default_version_info->prev;
32073 first_v->prev = default_version_info;
32074 default_version_info->next = first_v;
32075 default_version_info->prev = NULL;
32076 }
32077
32078 default_node = default_version_info->this_node;
32079
32080 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32081 if (targetm.has_ifunc_p ())
32082 {
32083 struct cgraph_function_version_info *it_v = NULL;
32084 struct cgraph_node *dispatcher_node = NULL;
32085 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32086
32087 /* Right now, the dispatching is done via ifunc. */
32088 dispatch_decl = make_dispatcher_decl (default_node->decl);
32089
32090 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32091 gcc_assert (dispatcher_node != NULL);
32092 dispatcher_node->dispatcher_function = 1;
32093 dispatcher_version_info
32094 = dispatcher_node->insert_new_function_version ();
32095 dispatcher_version_info->next = default_version_info;
32096 dispatcher_node->definition = 1;
32097
32098 /* Set the dispatcher for all the versions. */
32099 it_v = default_version_info;
32100 while (it_v != NULL)
32101 {
32102 it_v->dispatcher_resolver = dispatch_decl;
32103 it_v = it_v->next;
32104 }
32105 }
32106 else
32107 #endif
32108 {
32109 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32110 "multiversioning needs ifunc which is not supported "
32111 "on this target");
32112 }
32113
32114 return dispatch_decl;
32115 }
32116
32117 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32118 it to CHAIN. */
32119
32120 static tree
32121 make_attribute (const char *name, const char *arg_name, tree chain)
32122 {
32123 tree attr_name;
32124 tree attr_arg_name;
32125 tree attr_args;
32126 tree attr;
32127
32128 attr_name = get_identifier (name);
32129 attr_arg_name = build_string (strlen (arg_name), arg_name);
32130 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32131 attr = tree_cons (attr_name, attr_args, chain);
32132 return attr;
32133 }
32134
32135 /* Make the resolver function decl to dispatch the versions of
32136 a multi-versioned function, DEFAULT_DECL. Create an
32137 empty basic block in the resolver and store the pointer in
32138 EMPTY_BB. Return the decl of the resolver function. */
32139
32140 static tree
32141 make_resolver_func (const tree default_decl,
32142 const tree dispatch_decl,
32143 basic_block *empty_bb)
32144 {
32145 char *resolver_name;
32146 tree decl, type, decl_name, t;
32147 bool is_uniq = false;
32148
32149 /* IFUNC's have to be globally visible. So, if the default_decl is
32150 not, then the name of the IFUNC should be made unique. */
32151 if (TREE_PUBLIC (default_decl) == 0)
32152 is_uniq = true;
32153
32154 /* Append the filename to the resolver function if the versions are
32155 not externally visible. This is because the resolver function has
32156 to be externally visible for the loader to find it. So, appending
32157 the filename will prevent conflicts with a resolver function from
32158 another module which is based on the same version name. */
32159 resolver_name = make_name (default_decl, "resolver", is_uniq);
32160
32161 /* The resolver function should return a (void *). */
32162 type = build_function_type_list (ptr_type_node, NULL_TREE);
32163
32164 decl = build_fn_decl (resolver_name, type);
32165 decl_name = get_identifier (resolver_name);
32166 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32167
32168 DECL_NAME (decl) = decl_name;
32169 TREE_USED (decl) = 1;
32170 DECL_ARTIFICIAL (decl) = 1;
32171 DECL_IGNORED_P (decl) = 0;
32172 /* IFUNC resolvers have to be externally visible. */
32173 TREE_PUBLIC (decl) = 1;
32174 DECL_UNINLINABLE (decl) = 1;
32175
32176 /* Resolver is not external, body is generated. */
32177 DECL_EXTERNAL (decl) = 0;
32178 DECL_EXTERNAL (dispatch_decl) = 0;
32179
32180 DECL_CONTEXT (decl) = NULL_TREE;
32181 DECL_INITIAL (decl) = make_node (BLOCK);
32182 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32183
32184 if (DECL_COMDAT_GROUP (default_decl)
32185 || TREE_PUBLIC (default_decl))
32186 {
32187 /* In this case, each translation unit with a call to this
32188 versioned function will put out a resolver. Ensure it
32189 is comdat to keep just one copy. */
32190 DECL_COMDAT (decl) = 1;
32191 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32192 }
32193 /* Build result decl and add to function_decl. */
32194 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32195 DECL_ARTIFICIAL (t) = 1;
32196 DECL_IGNORED_P (t) = 1;
32197 DECL_RESULT (decl) = t;
32198
32199 gimplify_function_tree (decl);
32200 push_cfun (DECL_STRUCT_FUNCTION (decl));
32201 *empty_bb = init_lowered_empty_function (decl, false);
32202
32203 cgraph_node::add_new_function (decl, true);
32204 cgraph_node::get_create (decl)->call_function_insertion_hooks ();
32205
32206 pop_cfun ();
32207
32208 gcc_assert (dispatch_decl != NULL);
32209 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32210 DECL_ATTRIBUTES (dispatch_decl)
32211 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32212
32213 /* Create the alias for dispatch to resolver here. */
32214 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32215 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32216 XDELETEVEC (resolver_name);
32217 return decl;
32218 }
32219
32220 /* Generate the dispatching code body to dispatch multi-versioned function
32221 DECL. The target hook is called to process the "target" attributes and
32222 provide the code to dispatch the right function at run-time. NODE points
32223 to the dispatcher decl whose body will be created. */
32224
32225 static tree
32226 ix86_generate_version_dispatcher_body (void *node_p)
32227 {
32228 tree resolver_decl;
32229 basic_block empty_bb;
32230 tree default_ver_decl;
32231 struct cgraph_node *versn;
32232 struct cgraph_node *node;
32233
32234 struct cgraph_function_version_info *node_version_info = NULL;
32235 struct cgraph_function_version_info *versn_info = NULL;
32236
32237 node = (cgraph_node *)node_p;
32238
32239 node_version_info = node->function_version ();
32240 gcc_assert (node->dispatcher_function
32241 && node_version_info != NULL);
32242
32243 if (node_version_info->dispatcher_resolver)
32244 return node_version_info->dispatcher_resolver;
32245
32246 /* The first version in the chain corresponds to the default version. */
32247 default_ver_decl = node_version_info->next->this_node->decl;
32248
32249 /* node is going to be an alias, so remove the finalized bit. */
32250 node->definition = false;
32251
32252 resolver_decl = make_resolver_func (default_ver_decl,
32253 node->decl, &empty_bb);
32254
32255 node_version_info->dispatcher_resolver = resolver_decl;
32256
32257 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32258
32259 auto_vec<tree, 2> fn_ver_vec;
32260
32261 for (versn_info = node_version_info->next; versn_info;
32262 versn_info = versn_info->next)
32263 {
32264 versn = versn_info->this_node;
32265 /* Check for virtual functions here again, as by this time it should
32266 have been determined if this function needs a vtable index or
32267 not. This happens for methods in derived classes that override
32268 virtual methods in base classes but are not explicitly marked as
32269 virtual. */
32270 if (DECL_VINDEX (versn->decl))
32271 sorry ("Virtual function multiversioning not supported");
32272
32273 fn_ver_vec.safe_push (versn->decl);
32274 }
32275
32276 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32277 rebuild_cgraph_edges ();
32278 pop_cfun ();
32279 return resolver_decl;
32280 }
32281 /* This builds the processor_model struct type defined in
32282 libgcc/config/i386/cpuinfo.c */
32283
32284 static tree
32285 build_processor_model_struct (void)
32286 {
32287 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32288 "__cpu_features"};
32289 tree field = NULL_TREE, field_chain = NULL_TREE;
32290 int i;
32291 tree type = make_node (RECORD_TYPE);
32292
32293 /* The first 3 fields are unsigned int. */
32294 for (i = 0; i < 3; ++i)
32295 {
32296 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32297 get_identifier (field_name[i]), unsigned_type_node);
32298 if (field_chain != NULL_TREE)
32299 DECL_CHAIN (field) = field_chain;
32300 field_chain = field;
32301 }
32302
32303 /* The last field is an array of unsigned integers of size one. */
32304 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32305 get_identifier (field_name[3]),
32306 build_array_type (unsigned_type_node,
32307 build_index_type (size_one_node)));
32308 if (field_chain != NULL_TREE)
32309 DECL_CHAIN (field) = field_chain;
32310 field_chain = field;
32311
32312 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32313 return type;
32314 }
32315
32316 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32317
32318 static tree
32319 make_var_decl (tree type, const char *name)
32320 {
32321 tree new_decl;
32322
32323 new_decl = build_decl (UNKNOWN_LOCATION,
32324 VAR_DECL,
32325 get_identifier(name),
32326 type);
32327
32328 DECL_EXTERNAL (new_decl) = 1;
32329 TREE_STATIC (new_decl) = 1;
32330 TREE_PUBLIC (new_decl) = 1;
32331 DECL_INITIAL (new_decl) = 0;
32332 DECL_ARTIFICIAL (new_decl) = 0;
32333 DECL_PRESERVE_P (new_decl) = 1;
32334
32335 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32336 assemble_variable (new_decl, 0, 0, 0);
32337
32338 return new_decl;
32339 }
32340
32341 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32342 into an integer defined in libgcc/config/i386/cpuinfo.c */
32343
32344 static tree
32345 fold_builtin_cpu (tree fndecl, tree *args)
32346 {
32347 unsigned int i;
32348 enum ix86_builtins fn_code = (enum ix86_builtins)
32349 DECL_FUNCTION_CODE (fndecl);
32350 tree param_string_cst = NULL;
32351
32352 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32353 enum processor_features
32354 {
32355 F_CMOV = 0,
32356 F_MMX,
32357 F_POPCNT,
32358 F_SSE,
32359 F_SSE2,
32360 F_SSE3,
32361 F_SSSE3,
32362 F_SSE4_1,
32363 F_SSE4_2,
32364 F_AVX,
32365 F_AVX2,
32366 F_SSE4_A,
32367 F_FMA4,
32368 F_XOP,
32369 F_FMA,
32370 F_MAX
32371 };
32372
32373 /* These are the values for vendor types and cpu types and subtypes
32374 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32375 the corresponding start value. */
32376 enum processor_model
32377 {
32378 M_INTEL = 1,
32379 M_AMD,
32380 M_CPU_TYPE_START,
32381 M_INTEL_BONNELL,
32382 M_INTEL_CORE2,
32383 M_INTEL_COREI7,
32384 M_AMDFAM10H,
32385 M_AMDFAM15H,
32386 M_INTEL_SILVERMONT,
32387 M_AMD_BTVER1,
32388 M_AMD_BTVER2,
32389 M_CPU_SUBTYPE_START,
32390 M_INTEL_COREI7_NEHALEM,
32391 M_INTEL_COREI7_WESTMERE,
32392 M_INTEL_COREI7_SANDYBRIDGE,
32393 M_AMDFAM10H_BARCELONA,
32394 M_AMDFAM10H_SHANGHAI,
32395 M_AMDFAM10H_ISTANBUL,
32396 M_AMDFAM15H_BDVER1,
32397 M_AMDFAM15H_BDVER2,
32398 M_AMDFAM15H_BDVER3,
32399 M_AMDFAM15H_BDVER4,
32400 M_INTEL_COREI7_IVYBRIDGE,
32401 M_INTEL_COREI7_HASWELL
32402 };
32403
32404 static struct _arch_names_table
32405 {
32406 const char *const name;
32407 const enum processor_model model;
32408 }
32409 const arch_names_table[] =
32410 {
32411 {"amd", M_AMD},
32412 {"intel", M_INTEL},
32413 {"atom", M_INTEL_BONNELL},
32414 {"slm", M_INTEL_SILVERMONT},
32415 {"core2", M_INTEL_CORE2},
32416 {"corei7", M_INTEL_COREI7},
32417 {"nehalem", M_INTEL_COREI7_NEHALEM},
32418 {"westmere", M_INTEL_COREI7_WESTMERE},
32419 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32420 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32421 {"haswell", M_INTEL_COREI7_HASWELL},
32422 {"bonnell", M_INTEL_BONNELL},
32423 {"silvermont", M_INTEL_SILVERMONT},
32424 {"amdfam10h", M_AMDFAM10H},
32425 {"barcelona", M_AMDFAM10H_BARCELONA},
32426 {"shanghai", M_AMDFAM10H_SHANGHAI},
32427 {"istanbul", M_AMDFAM10H_ISTANBUL},
32428 {"btver1", M_AMD_BTVER1},
32429 {"amdfam15h", M_AMDFAM15H},
32430 {"bdver1", M_AMDFAM15H_BDVER1},
32431 {"bdver2", M_AMDFAM15H_BDVER2},
32432 {"bdver3", M_AMDFAM15H_BDVER3},
32433 {"bdver4", M_AMDFAM15H_BDVER4},
32434 {"btver2", M_AMD_BTVER2},
32435 };
32436
32437 static struct _isa_names_table
32438 {
32439 const char *const name;
32440 const enum processor_features feature;
32441 }
32442 const isa_names_table[] =
32443 {
32444 {"cmov", F_CMOV},
32445 {"mmx", F_MMX},
32446 {"popcnt", F_POPCNT},
32447 {"sse", F_SSE},
32448 {"sse2", F_SSE2},
32449 {"sse3", F_SSE3},
32450 {"ssse3", F_SSSE3},
32451 {"sse4a", F_SSE4_A},
32452 {"sse4.1", F_SSE4_1},
32453 {"sse4.2", F_SSE4_2},
32454 {"avx", F_AVX},
32455 {"fma4", F_FMA4},
32456 {"xop", F_XOP},
32457 {"fma", F_FMA},
32458 {"avx2", F_AVX2}
32459 };
32460
32461 tree __processor_model_type = build_processor_model_struct ();
32462 tree __cpu_model_var = make_var_decl (__processor_model_type,
32463 "__cpu_model");
32464
32465
32466 varpool_add_new_variable (__cpu_model_var);
32467
32468 gcc_assert ((args != NULL) && (*args != NULL));
32469
32470 param_string_cst = *args;
32471 while (param_string_cst
32472 && TREE_CODE (param_string_cst) != STRING_CST)
32473 {
32474 /* *args must be a expr that can contain other EXPRS leading to a
32475 STRING_CST. */
32476 if (!EXPR_P (param_string_cst))
32477 {
32478 error ("Parameter to builtin must be a string constant or literal");
32479 return integer_zero_node;
32480 }
32481 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32482 }
32483
32484 gcc_assert (param_string_cst);
32485
32486 if (fn_code == IX86_BUILTIN_CPU_IS)
32487 {
32488 tree ref;
32489 tree field;
32490 tree final;
32491
32492 unsigned int field_val = 0;
32493 unsigned int NUM_ARCH_NAMES
32494 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32495
32496 for (i = 0; i < NUM_ARCH_NAMES; i++)
32497 if (strcmp (arch_names_table[i].name,
32498 TREE_STRING_POINTER (param_string_cst)) == 0)
32499 break;
32500
32501 if (i == NUM_ARCH_NAMES)
32502 {
32503 error ("Parameter to builtin not valid: %s",
32504 TREE_STRING_POINTER (param_string_cst));
32505 return integer_zero_node;
32506 }
32507
32508 field = TYPE_FIELDS (__processor_model_type);
32509 field_val = arch_names_table[i].model;
32510
32511 /* CPU types are stored in the next field. */
32512 if (field_val > M_CPU_TYPE_START
32513 && field_val < M_CPU_SUBTYPE_START)
32514 {
32515 field = DECL_CHAIN (field);
32516 field_val -= M_CPU_TYPE_START;
32517 }
32518
32519 /* CPU subtypes are stored in the next field. */
32520 if (field_val > M_CPU_SUBTYPE_START)
32521 {
32522 field = DECL_CHAIN ( DECL_CHAIN (field));
32523 field_val -= M_CPU_SUBTYPE_START;
32524 }
32525
32526 /* Get the appropriate field in __cpu_model. */
32527 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32528 field, NULL_TREE);
32529
32530 /* Check the value. */
32531 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32532 build_int_cstu (unsigned_type_node, field_val));
32533 return build1 (CONVERT_EXPR, integer_type_node, final);
32534 }
32535 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32536 {
32537 tree ref;
32538 tree array_elt;
32539 tree field;
32540 tree final;
32541
32542 unsigned int field_val = 0;
32543 unsigned int NUM_ISA_NAMES
32544 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32545
32546 for (i = 0; i < NUM_ISA_NAMES; i++)
32547 if (strcmp (isa_names_table[i].name,
32548 TREE_STRING_POINTER (param_string_cst)) == 0)
32549 break;
32550
32551 if (i == NUM_ISA_NAMES)
32552 {
32553 error ("Parameter to builtin not valid: %s",
32554 TREE_STRING_POINTER (param_string_cst));
32555 return integer_zero_node;
32556 }
32557
32558 field = TYPE_FIELDS (__processor_model_type);
32559 /* Get the last field, which is __cpu_features. */
32560 while (DECL_CHAIN (field))
32561 field = DECL_CHAIN (field);
32562
32563 /* Get the appropriate field: __cpu_model.__cpu_features */
32564 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32565 field, NULL_TREE);
32566
32567 /* Access the 0th element of __cpu_features array. */
32568 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32569 integer_zero_node, NULL_TREE, NULL_TREE);
32570
32571 field_val = (1 << isa_names_table[i].feature);
32572 /* Return __cpu_model.__cpu_features[0] & field_val */
32573 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32574 build_int_cstu (unsigned_type_node, field_val));
32575 return build1 (CONVERT_EXPR, integer_type_node, final);
32576 }
32577 gcc_unreachable ();
32578 }
32579
32580 static tree
32581 ix86_fold_builtin (tree fndecl, int n_args,
32582 tree *args, bool ignore ATTRIBUTE_UNUSED)
32583 {
32584 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32585 {
32586 enum ix86_builtins fn_code = (enum ix86_builtins)
32587 DECL_FUNCTION_CODE (fndecl);
32588 if (fn_code == IX86_BUILTIN_CPU_IS
32589 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32590 {
32591 gcc_assert (n_args == 1);
32592 return fold_builtin_cpu (fndecl, args);
32593 }
32594 }
32595
32596 #ifdef SUBTARGET_FOLD_BUILTIN
32597 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32598 #endif
32599
32600 return NULL_TREE;
32601 }
32602
32603 /* Make builtins to detect cpu type and features supported. NAME is
32604 the builtin name, CODE is the builtin code, and FTYPE is the function
32605 type of the builtin. */
32606
32607 static void
32608 make_cpu_type_builtin (const char* name, int code,
32609 enum ix86_builtin_func_type ftype, bool is_const)
32610 {
32611 tree decl;
32612 tree type;
32613
32614 type = ix86_get_builtin_func_type (ftype);
32615 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32616 NULL, NULL_TREE);
32617 gcc_assert (decl != NULL_TREE);
32618 ix86_builtins[(int) code] = decl;
32619 TREE_READONLY (decl) = is_const;
32620 }
32621
32622 /* Make builtins to get CPU type and features supported. The created
32623 builtins are :
32624
32625 __builtin_cpu_init (), to detect cpu type and features,
32626 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32627 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32628 */
32629
32630 static void
32631 ix86_init_platform_type_builtins (void)
32632 {
32633 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32634 INT_FTYPE_VOID, false);
32635 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32636 INT_FTYPE_PCCHAR, true);
32637 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32638 INT_FTYPE_PCCHAR, true);
32639 }
32640
32641 /* Internal method for ix86_init_builtins. */
32642
32643 static void
32644 ix86_init_builtins_va_builtins_abi (void)
32645 {
32646 tree ms_va_ref, sysv_va_ref;
32647 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32648 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32649 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32650 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32651
32652 if (!TARGET_64BIT)
32653 return;
32654 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32655 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32656 ms_va_ref = build_reference_type (ms_va_list_type_node);
32657 sysv_va_ref =
32658 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32659
32660 fnvoid_va_end_ms =
32661 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32662 fnvoid_va_start_ms =
32663 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32664 fnvoid_va_end_sysv =
32665 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32666 fnvoid_va_start_sysv =
32667 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32668 NULL_TREE);
32669 fnvoid_va_copy_ms =
32670 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32671 NULL_TREE);
32672 fnvoid_va_copy_sysv =
32673 build_function_type_list (void_type_node, sysv_va_ref,
32674 sysv_va_ref, NULL_TREE);
32675
32676 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32677 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32678 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32679 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32680 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32681 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32682 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32683 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32684 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32685 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32686 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32687 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32688 }
32689
32690 static void
32691 ix86_init_builtin_types (void)
32692 {
32693 tree float128_type_node, float80_type_node;
32694
32695 /* The __float80 type. */
32696 float80_type_node = long_double_type_node;
32697 if (TYPE_MODE (float80_type_node) != XFmode)
32698 {
32699 /* The __float80 type. */
32700 float80_type_node = make_node (REAL_TYPE);
32701
32702 TYPE_PRECISION (float80_type_node) = 80;
32703 layout_type (float80_type_node);
32704 }
32705 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32706
32707 /* The __float128 type. */
32708 float128_type_node = make_node (REAL_TYPE);
32709 TYPE_PRECISION (float128_type_node) = 128;
32710 layout_type (float128_type_node);
32711 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32712
32713 /* This macro is built by i386-builtin-types.awk. */
32714 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32715 }
32716
32717 static void
32718 ix86_init_builtins (void)
32719 {
32720 tree t;
32721
32722 ix86_init_builtin_types ();
32723
32724 /* Builtins to get CPU type and features. */
32725 ix86_init_platform_type_builtins ();
32726
32727 /* TFmode support builtins. */
32728 def_builtin_const (0, "__builtin_infq",
32729 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32730 def_builtin_const (0, "__builtin_huge_valq",
32731 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32732
32733 /* We will expand them to normal call if SSE isn't available since
32734 they are used by libgcc. */
32735 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32736 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32737 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32738 TREE_READONLY (t) = 1;
32739 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32740
32741 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32742 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32743 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32744 TREE_READONLY (t) = 1;
32745 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32746
32747 ix86_init_tm_builtins ();
32748 ix86_init_mmx_sse_builtins ();
32749
32750 if (TARGET_LP64)
32751 ix86_init_builtins_va_builtins_abi ();
32752
32753 #ifdef SUBTARGET_INIT_BUILTINS
32754 SUBTARGET_INIT_BUILTINS;
32755 #endif
32756 }
32757
32758 /* Return the ix86 builtin for CODE. */
32759
32760 static tree
32761 ix86_builtin_decl (unsigned code, bool)
32762 {
32763 if (code >= IX86_BUILTIN_MAX)
32764 return error_mark_node;
32765
32766 return ix86_builtins[code];
32767 }
32768
32769 /* Errors in the source file can cause expand_expr to return const0_rtx
32770 where we expect a vector. To avoid crashing, use one of the vector
32771 clear instructions. */
32772 static rtx
32773 safe_vector_operand (rtx x, enum machine_mode mode)
32774 {
32775 if (x == const0_rtx)
32776 x = CONST0_RTX (mode);
32777 return x;
32778 }
32779
32780 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32781
32782 static rtx
32783 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32784 {
32785 rtx pat;
32786 tree arg0 = CALL_EXPR_ARG (exp, 0);
32787 tree arg1 = CALL_EXPR_ARG (exp, 1);
32788 rtx op0 = expand_normal (arg0);
32789 rtx op1 = expand_normal (arg1);
32790 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32791 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32792 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32793
32794 if (VECTOR_MODE_P (mode0))
32795 op0 = safe_vector_operand (op0, mode0);
32796 if (VECTOR_MODE_P (mode1))
32797 op1 = safe_vector_operand (op1, mode1);
32798
32799 if (optimize || !target
32800 || GET_MODE (target) != tmode
32801 || !insn_data[icode].operand[0].predicate (target, tmode))
32802 target = gen_reg_rtx (tmode);
32803
32804 if (GET_MODE (op1) == SImode && mode1 == TImode)
32805 {
32806 rtx x = gen_reg_rtx (V4SImode);
32807 emit_insn (gen_sse2_loadd (x, op1));
32808 op1 = gen_lowpart (TImode, x);
32809 }
32810
32811 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32812 op0 = copy_to_mode_reg (mode0, op0);
32813 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32814 op1 = copy_to_mode_reg (mode1, op1);
32815
32816 pat = GEN_FCN (icode) (target, op0, op1);
32817 if (! pat)
32818 return 0;
32819
32820 emit_insn (pat);
32821
32822 return target;
32823 }
32824
32825 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32826
32827 static rtx
32828 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32829 enum ix86_builtin_func_type m_type,
32830 enum rtx_code sub_code)
32831 {
32832 rtx pat;
32833 int i;
32834 int nargs;
32835 bool comparison_p = false;
32836 bool tf_p = false;
32837 bool last_arg_constant = false;
32838 int num_memory = 0;
32839 struct {
32840 rtx op;
32841 enum machine_mode mode;
32842 } args[4];
32843
32844 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32845
32846 switch (m_type)
32847 {
32848 case MULTI_ARG_4_DF2_DI_I:
32849 case MULTI_ARG_4_DF2_DI_I1:
32850 case MULTI_ARG_4_SF2_SI_I:
32851 case MULTI_ARG_4_SF2_SI_I1:
32852 nargs = 4;
32853 last_arg_constant = true;
32854 break;
32855
32856 case MULTI_ARG_3_SF:
32857 case MULTI_ARG_3_DF:
32858 case MULTI_ARG_3_SF2:
32859 case MULTI_ARG_3_DF2:
32860 case MULTI_ARG_3_DI:
32861 case MULTI_ARG_3_SI:
32862 case MULTI_ARG_3_SI_DI:
32863 case MULTI_ARG_3_HI:
32864 case MULTI_ARG_3_HI_SI:
32865 case MULTI_ARG_3_QI:
32866 case MULTI_ARG_3_DI2:
32867 case MULTI_ARG_3_SI2:
32868 case MULTI_ARG_3_HI2:
32869 case MULTI_ARG_3_QI2:
32870 nargs = 3;
32871 break;
32872
32873 case MULTI_ARG_2_SF:
32874 case MULTI_ARG_2_DF:
32875 case MULTI_ARG_2_DI:
32876 case MULTI_ARG_2_SI:
32877 case MULTI_ARG_2_HI:
32878 case MULTI_ARG_2_QI:
32879 nargs = 2;
32880 break;
32881
32882 case MULTI_ARG_2_DI_IMM:
32883 case MULTI_ARG_2_SI_IMM:
32884 case MULTI_ARG_2_HI_IMM:
32885 case MULTI_ARG_2_QI_IMM:
32886 nargs = 2;
32887 last_arg_constant = true;
32888 break;
32889
32890 case MULTI_ARG_1_SF:
32891 case MULTI_ARG_1_DF:
32892 case MULTI_ARG_1_SF2:
32893 case MULTI_ARG_1_DF2:
32894 case MULTI_ARG_1_DI:
32895 case MULTI_ARG_1_SI:
32896 case MULTI_ARG_1_HI:
32897 case MULTI_ARG_1_QI:
32898 case MULTI_ARG_1_SI_DI:
32899 case MULTI_ARG_1_HI_DI:
32900 case MULTI_ARG_1_HI_SI:
32901 case MULTI_ARG_1_QI_DI:
32902 case MULTI_ARG_1_QI_SI:
32903 case MULTI_ARG_1_QI_HI:
32904 nargs = 1;
32905 break;
32906
32907 case MULTI_ARG_2_DI_CMP:
32908 case MULTI_ARG_2_SI_CMP:
32909 case MULTI_ARG_2_HI_CMP:
32910 case MULTI_ARG_2_QI_CMP:
32911 nargs = 2;
32912 comparison_p = true;
32913 break;
32914
32915 case MULTI_ARG_2_SF_TF:
32916 case MULTI_ARG_2_DF_TF:
32917 case MULTI_ARG_2_DI_TF:
32918 case MULTI_ARG_2_SI_TF:
32919 case MULTI_ARG_2_HI_TF:
32920 case MULTI_ARG_2_QI_TF:
32921 nargs = 2;
32922 tf_p = true;
32923 break;
32924
32925 default:
32926 gcc_unreachable ();
32927 }
32928
32929 if (optimize || !target
32930 || GET_MODE (target) != tmode
32931 || !insn_data[icode].operand[0].predicate (target, tmode))
32932 target = gen_reg_rtx (tmode);
32933
32934 gcc_assert (nargs <= 4);
32935
32936 for (i = 0; i < nargs; i++)
32937 {
32938 tree arg = CALL_EXPR_ARG (exp, i);
32939 rtx op = expand_normal (arg);
32940 int adjust = (comparison_p) ? 1 : 0;
32941 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32942
32943 if (last_arg_constant && i == nargs - 1)
32944 {
32945 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32946 {
32947 enum insn_code new_icode = icode;
32948 switch (icode)
32949 {
32950 case CODE_FOR_xop_vpermil2v2df3:
32951 case CODE_FOR_xop_vpermil2v4sf3:
32952 case CODE_FOR_xop_vpermil2v4df3:
32953 case CODE_FOR_xop_vpermil2v8sf3:
32954 error ("the last argument must be a 2-bit immediate");
32955 return gen_reg_rtx (tmode);
32956 case CODE_FOR_xop_rotlv2di3:
32957 new_icode = CODE_FOR_rotlv2di3;
32958 goto xop_rotl;
32959 case CODE_FOR_xop_rotlv4si3:
32960 new_icode = CODE_FOR_rotlv4si3;
32961 goto xop_rotl;
32962 case CODE_FOR_xop_rotlv8hi3:
32963 new_icode = CODE_FOR_rotlv8hi3;
32964 goto xop_rotl;
32965 case CODE_FOR_xop_rotlv16qi3:
32966 new_icode = CODE_FOR_rotlv16qi3;
32967 xop_rotl:
32968 if (CONST_INT_P (op))
32969 {
32970 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32971 op = GEN_INT (INTVAL (op) & mask);
32972 gcc_checking_assert
32973 (insn_data[icode].operand[i + 1].predicate (op, mode));
32974 }
32975 else
32976 {
32977 gcc_checking_assert
32978 (nargs == 2
32979 && insn_data[new_icode].operand[0].mode == tmode
32980 && insn_data[new_icode].operand[1].mode == tmode
32981 && insn_data[new_icode].operand[2].mode == mode
32982 && insn_data[new_icode].operand[0].predicate
32983 == insn_data[icode].operand[0].predicate
32984 && insn_data[new_icode].operand[1].predicate
32985 == insn_data[icode].operand[1].predicate);
32986 icode = new_icode;
32987 goto non_constant;
32988 }
32989 break;
32990 default:
32991 gcc_unreachable ();
32992 }
32993 }
32994 }
32995 else
32996 {
32997 non_constant:
32998 if (VECTOR_MODE_P (mode))
32999 op = safe_vector_operand (op, mode);
33000
33001 /* If we aren't optimizing, only allow one memory operand to be
33002 generated. */
33003 if (memory_operand (op, mode))
33004 num_memory++;
33005
33006 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33007
33008 if (optimize
33009 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33010 || num_memory > 1)
33011 op = force_reg (mode, op);
33012 }
33013
33014 args[i].op = op;
33015 args[i].mode = mode;
33016 }
33017
33018 switch (nargs)
33019 {
33020 case 1:
33021 pat = GEN_FCN (icode) (target, args[0].op);
33022 break;
33023
33024 case 2:
33025 if (tf_p)
33026 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33027 GEN_INT ((int)sub_code));
33028 else if (! comparison_p)
33029 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33030 else
33031 {
33032 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33033 args[0].op,
33034 args[1].op);
33035
33036 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33037 }
33038 break;
33039
33040 case 3:
33041 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33042 break;
33043
33044 case 4:
33045 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33046 break;
33047
33048 default:
33049 gcc_unreachable ();
33050 }
33051
33052 if (! pat)
33053 return 0;
33054
33055 emit_insn (pat);
33056 return target;
33057 }
33058
33059 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33060 insns with vec_merge. */
33061
33062 static rtx
33063 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33064 rtx target)
33065 {
33066 rtx pat;
33067 tree arg0 = CALL_EXPR_ARG (exp, 0);
33068 rtx op1, op0 = expand_normal (arg0);
33069 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33070 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33071
33072 if (optimize || !target
33073 || GET_MODE (target) != tmode
33074 || !insn_data[icode].operand[0].predicate (target, tmode))
33075 target = gen_reg_rtx (tmode);
33076
33077 if (VECTOR_MODE_P (mode0))
33078 op0 = safe_vector_operand (op0, mode0);
33079
33080 if ((optimize && !register_operand (op0, mode0))
33081 || !insn_data[icode].operand[1].predicate (op0, mode0))
33082 op0 = copy_to_mode_reg (mode0, op0);
33083
33084 op1 = op0;
33085 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33086 op1 = copy_to_mode_reg (mode0, op1);
33087
33088 pat = GEN_FCN (icode) (target, op0, op1);
33089 if (! pat)
33090 return 0;
33091 emit_insn (pat);
33092 return target;
33093 }
33094
33095 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33096
33097 static rtx
33098 ix86_expand_sse_compare (const struct builtin_description *d,
33099 tree exp, rtx target, bool swap)
33100 {
33101 rtx pat;
33102 tree arg0 = CALL_EXPR_ARG (exp, 0);
33103 tree arg1 = CALL_EXPR_ARG (exp, 1);
33104 rtx op0 = expand_normal (arg0);
33105 rtx op1 = expand_normal (arg1);
33106 rtx op2;
33107 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33108 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33109 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33110 enum rtx_code comparison = d->comparison;
33111
33112 if (VECTOR_MODE_P (mode0))
33113 op0 = safe_vector_operand (op0, mode0);
33114 if (VECTOR_MODE_P (mode1))
33115 op1 = safe_vector_operand (op1, mode1);
33116
33117 /* Swap operands if we have a comparison that isn't available in
33118 hardware. */
33119 if (swap)
33120 {
33121 rtx tmp = gen_reg_rtx (mode1);
33122 emit_move_insn (tmp, op1);
33123 op1 = op0;
33124 op0 = tmp;
33125 }
33126
33127 if (optimize || !target
33128 || GET_MODE (target) != tmode
33129 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33130 target = gen_reg_rtx (tmode);
33131
33132 if ((optimize && !register_operand (op0, mode0))
33133 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33134 op0 = copy_to_mode_reg (mode0, op0);
33135 if ((optimize && !register_operand (op1, mode1))
33136 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33137 op1 = copy_to_mode_reg (mode1, op1);
33138
33139 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33140 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33141 if (! pat)
33142 return 0;
33143 emit_insn (pat);
33144 return target;
33145 }
33146
33147 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33148
33149 static rtx
33150 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33151 rtx target)
33152 {
33153 rtx pat;
33154 tree arg0 = CALL_EXPR_ARG (exp, 0);
33155 tree arg1 = CALL_EXPR_ARG (exp, 1);
33156 rtx op0 = expand_normal (arg0);
33157 rtx op1 = expand_normal (arg1);
33158 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33159 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33160 enum rtx_code comparison = d->comparison;
33161
33162 if (VECTOR_MODE_P (mode0))
33163 op0 = safe_vector_operand (op0, mode0);
33164 if (VECTOR_MODE_P (mode1))
33165 op1 = safe_vector_operand (op1, mode1);
33166
33167 /* Swap operands if we have a comparison that isn't available in
33168 hardware. */
33169 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33170 {
33171 rtx tmp = op1;
33172 op1 = op0;
33173 op0 = tmp;
33174 }
33175
33176 target = gen_reg_rtx (SImode);
33177 emit_move_insn (target, const0_rtx);
33178 target = gen_rtx_SUBREG (QImode, target, 0);
33179
33180 if ((optimize && !register_operand (op0, mode0))
33181 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33182 op0 = copy_to_mode_reg (mode0, op0);
33183 if ((optimize && !register_operand (op1, mode1))
33184 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33185 op1 = copy_to_mode_reg (mode1, op1);
33186
33187 pat = GEN_FCN (d->icode) (op0, op1);
33188 if (! pat)
33189 return 0;
33190 emit_insn (pat);
33191 emit_insn (gen_rtx_SET (VOIDmode,
33192 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33193 gen_rtx_fmt_ee (comparison, QImode,
33194 SET_DEST (pat),
33195 const0_rtx)));
33196
33197 return SUBREG_REG (target);
33198 }
33199
33200 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33201
33202 static rtx
33203 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33204 rtx target)
33205 {
33206 rtx pat;
33207 tree arg0 = CALL_EXPR_ARG (exp, 0);
33208 rtx op1, op0 = expand_normal (arg0);
33209 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33210 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33211
33212 if (optimize || target == 0
33213 || GET_MODE (target) != tmode
33214 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33215 target = gen_reg_rtx (tmode);
33216
33217 if (VECTOR_MODE_P (mode0))
33218 op0 = safe_vector_operand (op0, mode0);
33219
33220 if ((optimize && !register_operand (op0, mode0))
33221 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33222 op0 = copy_to_mode_reg (mode0, op0);
33223
33224 op1 = GEN_INT (d->comparison);
33225
33226 pat = GEN_FCN (d->icode) (target, op0, op1);
33227 if (! pat)
33228 return 0;
33229 emit_insn (pat);
33230 return target;
33231 }
33232
33233 static rtx
33234 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33235 tree exp, rtx target)
33236 {
33237 rtx pat;
33238 tree arg0 = CALL_EXPR_ARG (exp, 0);
33239 tree arg1 = CALL_EXPR_ARG (exp, 1);
33240 rtx op0 = expand_normal (arg0);
33241 rtx op1 = expand_normal (arg1);
33242 rtx op2;
33243 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33244 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33245 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33246
33247 if (optimize || target == 0
33248 || GET_MODE (target) != tmode
33249 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33250 target = gen_reg_rtx (tmode);
33251
33252 op0 = safe_vector_operand (op0, mode0);
33253 op1 = safe_vector_operand (op1, mode1);
33254
33255 if ((optimize && !register_operand (op0, mode0))
33256 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33257 op0 = copy_to_mode_reg (mode0, op0);
33258 if ((optimize && !register_operand (op1, mode1))
33259 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33260 op1 = copy_to_mode_reg (mode1, op1);
33261
33262 op2 = GEN_INT (d->comparison);
33263
33264 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33265 if (! pat)
33266 return 0;
33267 emit_insn (pat);
33268 return target;
33269 }
33270
33271 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33272
33273 static rtx
33274 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33275 rtx target)
33276 {
33277 rtx pat;
33278 tree arg0 = CALL_EXPR_ARG (exp, 0);
33279 tree arg1 = CALL_EXPR_ARG (exp, 1);
33280 rtx op0 = expand_normal (arg0);
33281 rtx op1 = expand_normal (arg1);
33282 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33283 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33284 enum rtx_code comparison = d->comparison;
33285
33286 if (VECTOR_MODE_P (mode0))
33287 op0 = safe_vector_operand (op0, mode0);
33288 if (VECTOR_MODE_P (mode1))
33289 op1 = safe_vector_operand (op1, mode1);
33290
33291 target = gen_reg_rtx (SImode);
33292 emit_move_insn (target, const0_rtx);
33293 target = gen_rtx_SUBREG (QImode, target, 0);
33294
33295 if ((optimize && !register_operand (op0, mode0))
33296 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33297 op0 = copy_to_mode_reg (mode0, op0);
33298 if ((optimize && !register_operand (op1, mode1))
33299 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33300 op1 = copy_to_mode_reg (mode1, op1);
33301
33302 pat = GEN_FCN (d->icode) (op0, op1);
33303 if (! pat)
33304 return 0;
33305 emit_insn (pat);
33306 emit_insn (gen_rtx_SET (VOIDmode,
33307 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33308 gen_rtx_fmt_ee (comparison, QImode,
33309 SET_DEST (pat),
33310 const0_rtx)));
33311
33312 return SUBREG_REG (target);
33313 }
33314
33315 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33316
33317 static rtx
33318 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33319 tree exp, rtx target)
33320 {
33321 rtx pat;
33322 tree arg0 = CALL_EXPR_ARG (exp, 0);
33323 tree arg1 = CALL_EXPR_ARG (exp, 1);
33324 tree arg2 = CALL_EXPR_ARG (exp, 2);
33325 tree arg3 = CALL_EXPR_ARG (exp, 3);
33326 tree arg4 = CALL_EXPR_ARG (exp, 4);
33327 rtx scratch0, scratch1;
33328 rtx op0 = expand_normal (arg0);
33329 rtx op1 = expand_normal (arg1);
33330 rtx op2 = expand_normal (arg2);
33331 rtx op3 = expand_normal (arg3);
33332 rtx op4 = expand_normal (arg4);
33333 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33334
33335 tmode0 = insn_data[d->icode].operand[0].mode;
33336 tmode1 = insn_data[d->icode].operand[1].mode;
33337 modev2 = insn_data[d->icode].operand[2].mode;
33338 modei3 = insn_data[d->icode].operand[3].mode;
33339 modev4 = insn_data[d->icode].operand[4].mode;
33340 modei5 = insn_data[d->icode].operand[5].mode;
33341 modeimm = insn_data[d->icode].operand[6].mode;
33342
33343 if (VECTOR_MODE_P (modev2))
33344 op0 = safe_vector_operand (op0, modev2);
33345 if (VECTOR_MODE_P (modev4))
33346 op2 = safe_vector_operand (op2, modev4);
33347
33348 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33349 op0 = copy_to_mode_reg (modev2, op0);
33350 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33351 op1 = copy_to_mode_reg (modei3, op1);
33352 if ((optimize && !register_operand (op2, modev4))
33353 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33354 op2 = copy_to_mode_reg (modev4, op2);
33355 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33356 op3 = copy_to_mode_reg (modei5, op3);
33357
33358 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33359 {
33360 error ("the fifth argument must be an 8-bit immediate");
33361 return const0_rtx;
33362 }
33363
33364 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33365 {
33366 if (optimize || !target
33367 || GET_MODE (target) != tmode0
33368 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33369 target = gen_reg_rtx (tmode0);
33370
33371 scratch1 = gen_reg_rtx (tmode1);
33372
33373 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33374 }
33375 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33376 {
33377 if (optimize || !target
33378 || GET_MODE (target) != tmode1
33379 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33380 target = gen_reg_rtx (tmode1);
33381
33382 scratch0 = gen_reg_rtx (tmode0);
33383
33384 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33385 }
33386 else
33387 {
33388 gcc_assert (d->flag);
33389
33390 scratch0 = gen_reg_rtx (tmode0);
33391 scratch1 = gen_reg_rtx (tmode1);
33392
33393 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33394 }
33395
33396 if (! pat)
33397 return 0;
33398
33399 emit_insn (pat);
33400
33401 if (d->flag)
33402 {
33403 target = gen_reg_rtx (SImode);
33404 emit_move_insn (target, const0_rtx);
33405 target = gen_rtx_SUBREG (QImode, target, 0);
33406
33407 emit_insn
33408 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33409 gen_rtx_fmt_ee (EQ, QImode,
33410 gen_rtx_REG ((enum machine_mode) d->flag,
33411 FLAGS_REG),
33412 const0_rtx)));
33413 return SUBREG_REG (target);
33414 }
33415 else
33416 return target;
33417 }
33418
33419
33420 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33421
33422 static rtx
33423 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33424 tree exp, rtx target)
33425 {
33426 rtx pat;
33427 tree arg0 = CALL_EXPR_ARG (exp, 0);
33428 tree arg1 = CALL_EXPR_ARG (exp, 1);
33429 tree arg2 = CALL_EXPR_ARG (exp, 2);
33430 rtx scratch0, scratch1;
33431 rtx op0 = expand_normal (arg0);
33432 rtx op1 = expand_normal (arg1);
33433 rtx op2 = expand_normal (arg2);
33434 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33435
33436 tmode0 = insn_data[d->icode].operand[0].mode;
33437 tmode1 = insn_data[d->icode].operand[1].mode;
33438 modev2 = insn_data[d->icode].operand[2].mode;
33439 modev3 = insn_data[d->icode].operand[3].mode;
33440 modeimm = insn_data[d->icode].operand[4].mode;
33441
33442 if (VECTOR_MODE_P (modev2))
33443 op0 = safe_vector_operand (op0, modev2);
33444 if (VECTOR_MODE_P (modev3))
33445 op1 = safe_vector_operand (op1, modev3);
33446
33447 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33448 op0 = copy_to_mode_reg (modev2, op0);
33449 if ((optimize && !register_operand (op1, modev3))
33450 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33451 op1 = copy_to_mode_reg (modev3, op1);
33452
33453 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33454 {
33455 error ("the third argument must be an 8-bit immediate");
33456 return const0_rtx;
33457 }
33458
33459 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33460 {
33461 if (optimize || !target
33462 || GET_MODE (target) != tmode0
33463 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33464 target = gen_reg_rtx (tmode0);
33465
33466 scratch1 = gen_reg_rtx (tmode1);
33467
33468 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33469 }
33470 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33471 {
33472 if (optimize || !target
33473 || GET_MODE (target) != tmode1
33474 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33475 target = gen_reg_rtx (tmode1);
33476
33477 scratch0 = gen_reg_rtx (tmode0);
33478
33479 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33480 }
33481 else
33482 {
33483 gcc_assert (d->flag);
33484
33485 scratch0 = gen_reg_rtx (tmode0);
33486 scratch1 = gen_reg_rtx (tmode1);
33487
33488 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33489 }
33490
33491 if (! pat)
33492 return 0;
33493
33494 emit_insn (pat);
33495
33496 if (d->flag)
33497 {
33498 target = gen_reg_rtx (SImode);
33499 emit_move_insn (target, const0_rtx);
33500 target = gen_rtx_SUBREG (QImode, target, 0);
33501
33502 emit_insn
33503 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33504 gen_rtx_fmt_ee (EQ, QImode,
33505 gen_rtx_REG ((enum machine_mode) d->flag,
33506 FLAGS_REG),
33507 const0_rtx)));
33508 return SUBREG_REG (target);
33509 }
33510 else
33511 return target;
33512 }
33513
33514 /* Subroutine of ix86_expand_builtin to take care of insns with
33515 variable number of operands. */
33516
33517 static rtx
33518 ix86_expand_args_builtin (const struct builtin_description *d,
33519 tree exp, rtx target)
33520 {
33521 rtx pat, real_target;
33522 unsigned int i, nargs;
33523 unsigned int nargs_constant = 0;
33524 unsigned int mask_pos = 0;
33525 int num_memory = 0;
33526 struct
33527 {
33528 rtx op;
33529 enum machine_mode mode;
33530 } args[6];
33531 bool last_arg_count = false;
33532 enum insn_code icode = d->icode;
33533 const struct insn_data_d *insn_p = &insn_data[icode];
33534 enum machine_mode tmode = insn_p->operand[0].mode;
33535 enum machine_mode rmode = VOIDmode;
33536 bool swap = false;
33537 enum rtx_code comparison = d->comparison;
33538
33539 switch ((enum ix86_builtin_func_type) d->flag)
33540 {
33541 case V2DF_FTYPE_V2DF_ROUND:
33542 case V4DF_FTYPE_V4DF_ROUND:
33543 case V4SF_FTYPE_V4SF_ROUND:
33544 case V8SF_FTYPE_V8SF_ROUND:
33545 case V4SI_FTYPE_V4SF_ROUND:
33546 case V8SI_FTYPE_V8SF_ROUND:
33547 return ix86_expand_sse_round (d, exp, target);
33548 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33549 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33550 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33551 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33552 case INT_FTYPE_V8SF_V8SF_PTEST:
33553 case INT_FTYPE_V4DI_V4DI_PTEST:
33554 case INT_FTYPE_V4DF_V4DF_PTEST:
33555 case INT_FTYPE_V4SF_V4SF_PTEST:
33556 case INT_FTYPE_V2DI_V2DI_PTEST:
33557 case INT_FTYPE_V2DF_V2DF_PTEST:
33558 return ix86_expand_sse_ptest (d, exp, target);
33559 case FLOAT128_FTYPE_FLOAT128:
33560 case FLOAT_FTYPE_FLOAT:
33561 case INT_FTYPE_INT:
33562 case UINT64_FTYPE_INT:
33563 case UINT16_FTYPE_UINT16:
33564 case INT64_FTYPE_INT64:
33565 case INT64_FTYPE_V4SF:
33566 case INT64_FTYPE_V2DF:
33567 case INT_FTYPE_V16QI:
33568 case INT_FTYPE_V8QI:
33569 case INT_FTYPE_V8SF:
33570 case INT_FTYPE_V4DF:
33571 case INT_FTYPE_V4SF:
33572 case INT_FTYPE_V2DF:
33573 case INT_FTYPE_V32QI:
33574 case V16QI_FTYPE_V16QI:
33575 case V8SI_FTYPE_V8SF:
33576 case V8SI_FTYPE_V4SI:
33577 case V8HI_FTYPE_V8HI:
33578 case V8HI_FTYPE_V16QI:
33579 case V8QI_FTYPE_V8QI:
33580 case V8SF_FTYPE_V8SF:
33581 case V8SF_FTYPE_V8SI:
33582 case V8SF_FTYPE_V4SF:
33583 case V8SF_FTYPE_V8HI:
33584 case V4SI_FTYPE_V4SI:
33585 case V4SI_FTYPE_V16QI:
33586 case V4SI_FTYPE_V4SF:
33587 case V4SI_FTYPE_V8SI:
33588 case V4SI_FTYPE_V8HI:
33589 case V4SI_FTYPE_V4DF:
33590 case V4SI_FTYPE_V2DF:
33591 case V4HI_FTYPE_V4HI:
33592 case V4DF_FTYPE_V4DF:
33593 case V4DF_FTYPE_V4SI:
33594 case V4DF_FTYPE_V4SF:
33595 case V4DF_FTYPE_V2DF:
33596 case V4SF_FTYPE_V4SF:
33597 case V4SF_FTYPE_V4SI:
33598 case V4SF_FTYPE_V8SF:
33599 case V4SF_FTYPE_V4DF:
33600 case V4SF_FTYPE_V8HI:
33601 case V4SF_FTYPE_V2DF:
33602 case V2DI_FTYPE_V2DI:
33603 case V2DI_FTYPE_V16QI:
33604 case V2DI_FTYPE_V8HI:
33605 case V2DI_FTYPE_V4SI:
33606 case V2DF_FTYPE_V2DF:
33607 case V2DF_FTYPE_V4SI:
33608 case V2DF_FTYPE_V4DF:
33609 case V2DF_FTYPE_V4SF:
33610 case V2DF_FTYPE_V2SI:
33611 case V2SI_FTYPE_V2SI:
33612 case V2SI_FTYPE_V4SF:
33613 case V2SI_FTYPE_V2SF:
33614 case V2SI_FTYPE_V2DF:
33615 case V2SF_FTYPE_V2SF:
33616 case V2SF_FTYPE_V2SI:
33617 case V32QI_FTYPE_V32QI:
33618 case V32QI_FTYPE_V16QI:
33619 case V16HI_FTYPE_V16HI:
33620 case V16HI_FTYPE_V8HI:
33621 case V8SI_FTYPE_V8SI:
33622 case V16HI_FTYPE_V16QI:
33623 case V8SI_FTYPE_V16QI:
33624 case V4DI_FTYPE_V16QI:
33625 case V8SI_FTYPE_V8HI:
33626 case V4DI_FTYPE_V8HI:
33627 case V4DI_FTYPE_V4SI:
33628 case V4DI_FTYPE_V2DI:
33629 case HI_FTYPE_HI:
33630 case UINT_FTYPE_V2DF:
33631 case UINT_FTYPE_V4SF:
33632 case UINT64_FTYPE_V2DF:
33633 case UINT64_FTYPE_V4SF:
33634 case V16QI_FTYPE_V8DI:
33635 case V16HI_FTYPE_V16SI:
33636 case V16SI_FTYPE_HI:
33637 case V16SI_FTYPE_V16SI:
33638 case V16SI_FTYPE_INT:
33639 case V16SF_FTYPE_FLOAT:
33640 case V16SF_FTYPE_V4SF:
33641 case V16SF_FTYPE_V16SF:
33642 case V8HI_FTYPE_V8DI:
33643 case V8UHI_FTYPE_V8UHI:
33644 case V8SI_FTYPE_V8DI:
33645 case V8USI_FTYPE_V8USI:
33646 case V8SF_FTYPE_V8DF:
33647 case V8DI_FTYPE_QI:
33648 case V8DI_FTYPE_INT64:
33649 case V8DI_FTYPE_V4DI:
33650 case V8DI_FTYPE_V8DI:
33651 case V8DF_FTYPE_DOUBLE:
33652 case V8DF_FTYPE_V4DF:
33653 case V8DF_FTYPE_V8DF:
33654 case V8DF_FTYPE_V8SI:
33655 nargs = 1;
33656 break;
33657 case V4SF_FTYPE_V4SF_VEC_MERGE:
33658 case V2DF_FTYPE_V2DF_VEC_MERGE:
33659 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33660 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33661 case V16QI_FTYPE_V16QI_V16QI:
33662 case V16QI_FTYPE_V8HI_V8HI:
33663 case V16SI_FTYPE_V16SI_V16SI:
33664 case V16SF_FTYPE_V16SF_V16SF:
33665 case V16SF_FTYPE_V16SF_V16SI:
33666 case V8QI_FTYPE_V8QI_V8QI:
33667 case V8QI_FTYPE_V4HI_V4HI:
33668 case V8HI_FTYPE_V8HI_V8HI:
33669 case V8HI_FTYPE_V16QI_V16QI:
33670 case V8HI_FTYPE_V4SI_V4SI:
33671 case V8SF_FTYPE_V8SF_V8SF:
33672 case V8SF_FTYPE_V8SF_V8SI:
33673 case V8DI_FTYPE_V8DI_V8DI:
33674 case V8DF_FTYPE_V8DF_V8DF:
33675 case V8DF_FTYPE_V8DF_V8DI:
33676 case V4SI_FTYPE_V4SI_V4SI:
33677 case V4SI_FTYPE_V8HI_V8HI:
33678 case V4SI_FTYPE_V4SF_V4SF:
33679 case V4SI_FTYPE_V2DF_V2DF:
33680 case V4HI_FTYPE_V4HI_V4HI:
33681 case V4HI_FTYPE_V8QI_V8QI:
33682 case V4HI_FTYPE_V2SI_V2SI:
33683 case V4DF_FTYPE_V4DF_V4DF:
33684 case V4DF_FTYPE_V4DF_V4DI:
33685 case V4SF_FTYPE_V4SF_V4SF:
33686 case V4SF_FTYPE_V4SF_V4SI:
33687 case V4SF_FTYPE_V4SF_V2SI:
33688 case V4SF_FTYPE_V4SF_V2DF:
33689 case V4SF_FTYPE_V4SF_UINT:
33690 case V4SF_FTYPE_V4SF_UINT64:
33691 case V4SF_FTYPE_V4SF_DI:
33692 case V4SF_FTYPE_V4SF_SI:
33693 case V2DI_FTYPE_V2DI_V2DI:
33694 case V2DI_FTYPE_V16QI_V16QI:
33695 case V2DI_FTYPE_V4SI_V4SI:
33696 case V2UDI_FTYPE_V4USI_V4USI:
33697 case V2DI_FTYPE_V2DI_V16QI:
33698 case V2DI_FTYPE_V2DF_V2DF:
33699 case V2SI_FTYPE_V2SI_V2SI:
33700 case V2SI_FTYPE_V4HI_V4HI:
33701 case V2SI_FTYPE_V2SF_V2SF:
33702 case V2DF_FTYPE_V2DF_V2DF:
33703 case V2DF_FTYPE_V2DF_V4SF:
33704 case V2DF_FTYPE_V2DF_V2DI:
33705 case V2DF_FTYPE_V2DF_DI:
33706 case V2DF_FTYPE_V2DF_SI:
33707 case V2DF_FTYPE_V2DF_UINT:
33708 case V2DF_FTYPE_V2DF_UINT64:
33709 case V2SF_FTYPE_V2SF_V2SF:
33710 case V1DI_FTYPE_V1DI_V1DI:
33711 case V1DI_FTYPE_V8QI_V8QI:
33712 case V1DI_FTYPE_V2SI_V2SI:
33713 case V32QI_FTYPE_V16HI_V16HI:
33714 case V16HI_FTYPE_V8SI_V8SI:
33715 case V32QI_FTYPE_V32QI_V32QI:
33716 case V16HI_FTYPE_V32QI_V32QI:
33717 case V16HI_FTYPE_V16HI_V16HI:
33718 case V8SI_FTYPE_V4DF_V4DF:
33719 case V8SI_FTYPE_V8SI_V8SI:
33720 case V8SI_FTYPE_V16HI_V16HI:
33721 case V4DI_FTYPE_V4DI_V4DI:
33722 case V4DI_FTYPE_V8SI_V8SI:
33723 case V4UDI_FTYPE_V8USI_V8USI:
33724 case QI_FTYPE_V8DI_V8DI:
33725 case HI_FTYPE_V16SI_V16SI:
33726 if (comparison == UNKNOWN)
33727 return ix86_expand_binop_builtin (icode, exp, target);
33728 nargs = 2;
33729 break;
33730 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33731 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33732 gcc_assert (comparison != UNKNOWN);
33733 nargs = 2;
33734 swap = true;
33735 break;
33736 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33737 case V16HI_FTYPE_V16HI_SI_COUNT:
33738 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33739 case V8SI_FTYPE_V8SI_SI_COUNT:
33740 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33741 case V4DI_FTYPE_V4DI_INT_COUNT:
33742 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33743 case V8HI_FTYPE_V8HI_SI_COUNT:
33744 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33745 case V4SI_FTYPE_V4SI_SI_COUNT:
33746 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33747 case V4HI_FTYPE_V4HI_SI_COUNT:
33748 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33749 case V2DI_FTYPE_V2DI_SI_COUNT:
33750 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33751 case V2SI_FTYPE_V2SI_SI_COUNT:
33752 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33753 case V1DI_FTYPE_V1DI_SI_COUNT:
33754 nargs = 2;
33755 last_arg_count = true;
33756 break;
33757 case UINT64_FTYPE_UINT64_UINT64:
33758 case UINT_FTYPE_UINT_UINT:
33759 case UINT_FTYPE_UINT_USHORT:
33760 case UINT_FTYPE_UINT_UCHAR:
33761 case UINT16_FTYPE_UINT16_INT:
33762 case UINT8_FTYPE_UINT8_INT:
33763 case HI_FTYPE_HI_HI:
33764 case V16SI_FTYPE_V8DF_V8DF:
33765 nargs = 2;
33766 break;
33767 case V2DI_FTYPE_V2DI_INT_CONVERT:
33768 nargs = 2;
33769 rmode = V1TImode;
33770 nargs_constant = 1;
33771 break;
33772 case V4DI_FTYPE_V4DI_INT_CONVERT:
33773 nargs = 2;
33774 rmode = V2TImode;
33775 nargs_constant = 1;
33776 break;
33777 case V8HI_FTYPE_V8HI_INT:
33778 case V8HI_FTYPE_V8SF_INT:
33779 case V16HI_FTYPE_V16SF_INT:
33780 case V8HI_FTYPE_V4SF_INT:
33781 case V8SF_FTYPE_V8SF_INT:
33782 case V4SF_FTYPE_V16SF_INT:
33783 case V16SF_FTYPE_V16SF_INT:
33784 case V4SI_FTYPE_V4SI_INT:
33785 case V4SI_FTYPE_V8SI_INT:
33786 case V4HI_FTYPE_V4HI_INT:
33787 case V4DF_FTYPE_V4DF_INT:
33788 case V4DF_FTYPE_V8DF_INT:
33789 case V4SF_FTYPE_V4SF_INT:
33790 case V4SF_FTYPE_V8SF_INT:
33791 case V2DI_FTYPE_V2DI_INT:
33792 case V2DF_FTYPE_V2DF_INT:
33793 case V2DF_FTYPE_V4DF_INT:
33794 case V16HI_FTYPE_V16HI_INT:
33795 case V8SI_FTYPE_V8SI_INT:
33796 case V16SI_FTYPE_V16SI_INT:
33797 case V4SI_FTYPE_V16SI_INT:
33798 case V4DI_FTYPE_V4DI_INT:
33799 case V2DI_FTYPE_V4DI_INT:
33800 case V4DI_FTYPE_V8DI_INT:
33801 case HI_FTYPE_HI_INT:
33802 nargs = 2;
33803 nargs_constant = 1;
33804 break;
33805 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33806 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33807 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33808 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33809 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33810 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33811 case HI_FTYPE_V16SI_V16SI_HI:
33812 case QI_FTYPE_V8DI_V8DI_QI:
33813 case V16HI_FTYPE_V16SI_V16HI_HI:
33814 case V16QI_FTYPE_V16SI_V16QI_HI:
33815 case V16QI_FTYPE_V8DI_V16QI_QI:
33816 case V16SF_FTYPE_V16SF_V16SF_HI:
33817 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33818 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33819 case V16SF_FTYPE_V16SI_V16SF_HI:
33820 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33821 case V16SF_FTYPE_V4SF_V16SF_HI:
33822 case V16SI_FTYPE_SI_V16SI_HI:
33823 case V16SI_FTYPE_V16HI_V16SI_HI:
33824 case V16SI_FTYPE_V16QI_V16SI_HI:
33825 case V16SI_FTYPE_V16SF_V16SI_HI:
33826 case V16SI_FTYPE_V16SI_V16SI_HI:
33827 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33828 case V16SI_FTYPE_V4SI_V16SI_HI:
33829 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33830 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33831 case V8DF_FTYPE_V2DF_V8DF_QI:
33832 case V8DF_FTYPE_V4DF_V8DF_QI:
33833 case V8DF_FTYPE_V8DF_V8DF_QI:
33834 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33835 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33836 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33837 case V8DF_FTYPE_V8SF_V8DF_QI:
33838 case V8DF_FTYPE_V8SI_V8DF_QI:
33839 case V8DI_FTYPE_DI_V8DI_QI:
33840 case V8DI_FTYPE_V16QI_V8DI_QI:
33841 case V8DI_FTYPE_V2DI_V8DI_QI:
33842 case V8DI_FTYPE_V4DI_V8DI_QI:
33843 case V8DI_FTYPE_V8DI_V8DI_QI:
33844 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33845 case V8DI_FTYPE_V8HI_V8DI_QI:
33846 case V8DI_FTYPE_V8SI_V8DI_QI:
33847 case V8HI_FTYPE_V8DI_V8HI_QI:
33848 case V8SF_FTYPE_V8DF_V8SF_QI:
33849 case V8SI_FTYPE_V8DF_V8SI_QI:
33850 case V8SI_FTYPE_V8DI_V8SI_QI:
33851 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33852 nargs = 3;
33853 break;
33854 case V32QI_FTYPE_V32QI_V32QI_INT:
33855 case V16HI_FTYPE_V16HI_V16HI_INT:
33856 case V16QI_FTYPE_V16QI_V16QI_INT:
33857 case V4DI_FTYPE_V4DI_V4DI_INT:
33858 case V8HI_FTYPE_V8HI_V8HI_INT:
33859 case V8SI_FTYPE_V8SI_V8SI_INT:
33860 case V8SI_FTYPE_V8SI_V4SI_INT:
33861 case V8SF_FTYPE_V8SF_V8SF_INT:
33862 case V8SF_FTYPE_V8SF_V4SF_INT:
33863 case V4SI_FTYPE_V4SI_V4SI_INT:
33864 case V4DF_FTYPE_V4DF_V4DF_INT:
33865 case V16SF_FTYPE_V16SF_V16SF_INT:
33866 case V16SF_FTYPE_V16SF_V4SF_INT:
33867 case V16SI_FTYPE_V16SI_V4SI_INT:
33868 case V4DF_FTYPE_V4DF_V2DF_INT:
33869 case V4SF_FTYPE_V4SF_V4SF_INT:
33870 case V2DI_FTYPE_V2DI_V2DI_INT:
33871 case V4DI_FTYPE_V4DI_V2DI_INT:
33872 case V2DF_FTYPE_V2DF_V2DF_INT:
33873 case QI_FTYPE_V8DI_V8DI_INT:
33874 case QI_FTYPE_V8DF_V8DF_INT:
33875 case QI_FTYPE_V2DF_V2DF_INT:
33876 case QI_FTYPE_V4SF_V4SF_INT:
33877 case HI_FTYPE_V16SI_V16SI_INT:
33878 case HI_FTYPE_V16SF_V16SF_INT:
33879 nargs = 3;
33880 nargs_constant = 1;
33881 break;
33882 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33883 nargs = 3;
33884 rmode = V4DImode;
33885 nargs_constant = 1;
33886 break;
33887 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33888 nargs = 3;
33889 rmode = V2DImode;
33890 nargs_constant = 1;
33891 break;
33892 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33893 nargs = 3;
33894 rmode = DImode;
33895 nargs_constant = 1;
33896 break;
33897 case V2DI_FTYPE_V2DI_UINT_UINT:
33898 nargs = 3;
33899 nargs_constant = 2;
33900 break;
33901 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33902 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33903 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33904 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33905 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33906 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33907 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33908 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33909 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33910 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33911 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33912 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33913 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33914 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33915 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33916 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33917 nargs = 4;
33918 break;
33919 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33920 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33921 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33922 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33923 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33924 nargs = 4;
33925 nargs_constant = 1;
33926 break;
33927 case QI_FTYPE_V2DF_V2DF_INT_QI:
33928 case QI_FTYPE_V4SF_V4SF_INT_QI:
33929 nargs = 4;
33930 mask_pos = 1;
33931 nargs_constant = 1;
33932 break;
33933 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33934 nargs = 4;
33935 nargs_constant = 2;
33936 break;
33937 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33938 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33939 nargs = 4;
33940 break;
33941 case QI_FTYPE_V8DI_V8DI_INT_QI:
33942 case HI_FTYPE_V16SI_V16SI_INT_HI:
33943 case QI_FTYPE_V8DF_V8DF_INT_QI:
33944 case HI_FTYPE_V16SF_V16SF_INT_HI:
33945 mask_pos = 1;
33946 nargs = 4;
33947 nargs_constant = 1;
33948 break;
33949 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33950 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33951 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33952 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33953 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33954 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33955 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33956 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33957 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33958 nargs = 4;
33959 mask_pos = 2;
33960 nargs_constant = 1;
33961 break;
33962 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33963 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33964 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33965 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33966 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33967 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33968 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33969 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33970 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33971 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33972 nargs = 5;
33973 mask_pos = 2;
33974 nargs_constant = 1;
33975 break;
33976 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33977 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33978 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33979 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33980 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33981 nargs = 5;
33982 mask_pos = 1;
33983 nargs_constant = 1;
33984 break;
33985
33986 default:
33987 gcc_unreachable ();
33988 }
33989
33990 gcc_assert (nargs <= ARRAY_SIZE (args));
33991
33992 if (comparison != UNKNOWN)
33993 {
33994 gcc_assert (nargs == 2);
33995 return ix86_expand_sse_compare (d, exp, target, swap);
33996 }
33997
33998 if (rmode == VOIDmode || rmode == tmode)
33999 {
34000 if (optimize
34001 || target == 0
34002 || GET_MODE (target) != tmode
34003 || !insn_p->operand[0].predicate (target, tmode))
34004 target = gen_reg_rtx (tmode);
34005 real_target = target;
34006 }
34007 else
34008 {
34009 real_target = gen_reg_rtx (tmode);
34010 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34011 }
34012
34013 for (i = 0; i < nargs; i++)
34014 {
34015 tree arg = CALL_EXPR_ARG (exp, i);
34016 rtx op = expand_normal (arg);
34017 enum machine_mode mode = insn_p->operand[i + 1].mode;
34018 bool match = insn_p->operand[i + 1].predicate (op, mode);
34019
34020 if (last_arg_count && (i + 1) == nargs)
34021 {
34022 /* SIMD shift insns take either an 8-bit immediate or
34023 register as count. But builtin functions take int as
34024 count. If count doesn't match, we put it in register. */
34025 if (!match)
34026 {
34027 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34028 if (!insn_p->operand[i + 1].predicate (op, mode))
34029 op = copy_to_reg (op);
34030 }
34031 }
34032 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34033 (!mask_pos && (nargs - i) <= nargs_constant))
34034 {
34035 if (!match)
34036 switch (icode)
34037 {
34038 case CODE_FOR_avx2_inserti128:
34039 case CODE_FOR_avx2_extracti128:
34040 error ("the last argument must be an 1-bit immediate");
34041 return const0_rtx;
34042
34043 case CODE_FOR_avx512f_cmpv8di3_mask:
34044 case CODE_FOR_avx512f_cmpv16si3_mask:
34045 case CODE_FOR_avx512f_ucmpv8di3_mask:
34046 case CODE_FOR_avx512f_ucmpv16si3_mask:
34047 error ("the last argument must be a 3-bit immediate");
34048 return const0_rtx;
34049
34050 case CODE_FOR_sse4_1_roundsd:
34051 case CODE_FOR_sse4_1_roundss:
34052
34053 case CODE_FOR_sse4_1_roundpd:
34054 case CODE_FOR_sse4_1_roundps:
34055 case CODE_FOR_avx_roundpd256:
34056 case CODE_FOR_avx_roundps256:
34057
34058 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34059 case CODE_FOR_sse4_1_roundps_sfix:
34060 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34061 case CODE_FOR_avx_roundps_sfix256:
34062
34063 case CODE_FOR_sse4_1_blendps:
34064 case CODE_FOR_avx_blendpd256:
34065 case CODE_FOR_avx_vpermilv4df:
34066 case CODE_FOR_avx512f_getmantv8df_mask:
34067 case CODE_FOR_avx512f_getmantv16sf_mask:
34068 error ("the last argument must be a 4-bit immediate");
34069 return const0_rtx;
34070
34071 case CODE_FOR_sha1rnds4:
34072 case CODE_FOR_sse4_1_blendpd:
34073 case CODE_FOR_avx_vpermilv2df:
34074 case CODE_FOR_xop_vpermil2v2df3:
34075 case CODE_FOR_xop_vpermil2v4sf3:
34076 case CODE_FOR_xop_vpermil2v4df3:
34077 case CODE_FOR_xop_vpermil2v8sf3:
34078 case CODE_FOR_avx512f_vinsertf32x4_mask:
34079 case CODE_FOR_avx512f_vinserti32x4_mask:
34080 case CODE_FOR_avx512f_vextractf32x4_mask:
34081 case CODE_FOR_avx512f_vextracti32x4_mask:
34082 error ("the last argument must be a 2-bit immediate");
34083 return const0_rtx;
34084
34085 case CODE_FOR_avx_vextractf128v4df:
34086 case CODE_FOR_avx_vextractf128v8sf:
34087 case CODE_FOR_avx_vextractf128v8si:
34088 case CODE_FOR_avx_vinsertf128v4df:
34089 case CODE_FOR_avx_vinsertf128v8sf:
34090 case CODE_FOR_avx_vinsertf128v8si:
34091 case CODE_FOR_avx512f_vinsertf64x4_mask:
34092 case CODE_FOR_avx512f_vinserti64x4_mask:
34093 case CODE_FOR_avx512f_vextractf64x4_mask:
34094 case CODE_FOR_avx512f_vextracti64x4_mask:
34095 error ("the last argument must be a 1-bit immediate");
34096 return const0_rtx;
34097
34098 case CODE_FOR_avx_vmcmpv2df3:
34099 case CODE_FOR_avx_vmcmpv4sf3:
34100 case CODE_FOR_avx_cmpv2df3:
34101 case CODE_FOR_avx_cmpv4sf3:
34102 case CODE_FOR_avx_cmpv4df3:
34103 case CODE_FOR_avx_cmpv8sf3:
34104 case CODE_FOR_avx512f_cmpv8df3_mask:
34105 case CODE_FOR_avx512f_cmpv16sf3_mask:
34106 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34107 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34108 error ("the last argument must be a 5-bit immediate");
34109 return const0_rtx;
34110
34111 default:
34112 switch (nargs_constant)
34113 {
34114 case 2:
34115 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34116 (!mask_pos && (nargs - i) == nargs_constant))
34117 {
34118 error ("the next to last argument must be an 8-bit immediate");
34119 break;
34120 }
34121 case 1:
34122 error ("the last argument must be an 8-bit immediate");
34123 break;
34124 default:
34125 gcc_unreachable ();
34126 }
34127 return const0_rtx;
34128 }
34129 }
34130 else
34131 {
34132 if (VECTOR_MODE_P (mode))
34133 op = safe_vector_operand (op, mode);
34134
34135 /* If we aren't optimizing, only allow one memory operand to
34136 be generated. */
34137 if (memory_operand (op, mode))
34138 num_memory++;
34139
34140 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34141 {
34142 if (optimize || !match || num_memory > 1)
34143 op = copy_to_mode_reg (mode, op);
34144 }
34145 else
34146 {
34147 op = copy_to_reg (op);
34148 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34149 }
34150 }
34151
34152 args[i].op = op;
34153 args[i].mode = mode;
34154 }
34155
34156 switch (nargs)
34157 {
34158 case 1:
34159 pat = GEN_FCN (icode) (real_target, args[0].op);
34160 break;
34161 case 2:
34162 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34163 break;
34164 case 3:
34165 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34166 args[2].op);
34167 break;
34168 case 4:
34169 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34170 args[2].op, args[3].op);
34171 break;
34172 case 5:
34173 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34174 args[2].op, args[3].op, args[4].op);
34175 case 6:
34176 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34177 args[2].op, args[3].op, args[4].op,
34178 args[5].op);
34179 break;
34180 default:
34181 gcc_unreachable ();
34182 }
34183
34184 if (! pat)
34185 return 0;
34186
34187 emit_insn (pat);
34188 return target;
34189 }
34190
34191 /* Transform pattern of following layout:
34192 (parallel [
34193 set (A B)
34194 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34195 ])
34196 into:
34197 (set (A B))
34198
34199 Or:
34200 (parallel [ A B
34201 ...
34202 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34203 ...
34204 ])
34205 into:
34206 (parallel [ A B ... ]) */
34207
34208 static rtx
34209 ix86_erase_embedded_rounding (rtx pat)
34210 {
34211 if (GET_CODE (pat) == INSN)
34212 pat = PATTERN (pat);
34213
34214 gcc_assert (GET_CODE (pat) == PARALLEL);
34215
34216 if (XVECLEN (pat, 0) == 2)
34217 {
34218 rtx p0 = XVECEXP (pat, 0, 0);
34219 rtx p1 = XVECEXP (pat, 0, 1);
34220
34221 gcc_assert (GET_CODE (p0) == SET
34222 && GET_CODE (p1) == UNSPEC
34223 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34224
34225 return p0;
34226 }
34227 else
34228 {
34229 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34230 int i = 0;
34231 int j = 0;
34232
34233 for (; i < XVECLEN (pat, 0); ++i)
34234 {
34235 rtx elem = XVECEXP (pat, 0, i);
34236 if (GET_CODE (elem) != UNSPEC
34237 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34238 res [j++] = elem;
34239 }
34240
34241 /* No more than 1 occurence was removed. */
34242 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34243
34244 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34245 }
34246 }
34247
34248 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34249 with rounding. */
34250 static rtx
34251 ix86_expand_sse_comi_round (const struct builtin_description *d,
34252 tree exp, rtx target)
34253 {
34254 rtx pat, set_dst;
34255 tree arg0 = CALL_EXPR_ARG (exp, 0);
34256 tree arg1 = CALL_EXPR_ARG (exp, 1);
34257 tree arg2 = CALL_EXPR_ARG (exp, 2);
34258 tree arg3 = CALL_EXPR_ARG (exp, 3);
34259 rtx op0 = expand_normal (arg0);
34260 rtx op1 = expand_normal (arg1);
34261 rtx op2 = expand_normal (arg2);
34262 rtx op3 = expand_normal (arg3);
34263 enum insn_code icode = d->icode;
34264 const struct insn_data_d *insn_p = &insn_data[icode];
34265 enum machine_mode mode0 = insn_p->operand[0].mode;
34266 enum machine_mode mode1 = insn_p->operand[1].mode;
34267 enum rtx_code comparison = UNEQ;
34268 bool need_ucomi = false;
34269
34270 /* See avxintrin.h for values. */
34271 enum rtx_code comi_comparisons[32] =
34272 {
34273 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34274 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34275 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34276 };
34277 bool need_ucomi_values[32] =
34278 {
34279 true, false, false, true, true, false, false, true,
34280 true, false, false, true, true, false, false, true,
34281 false, true, true, false, false, true, true, false,
34282 false, true, true, false, false, true, true, false
34283 };
34284
34285 if (!CONST_INT_P (op2))
34286 {
34287 error ("the third argument must be comparison constant");
34288 return const0_rtx;
34289 }
34290 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34291 {
34292 error ("incorect comparison mode");
34293 return const0_rtx;
34294 }
34295
34296 if (!insn_p->operand[2].predicate (op3, SImode))
34297 {
34298 error ("incorrect rounding operand");
34299 return const0_rtx;
34300 }
34301
34302 comparison = comi_comparisons[INTVAL (op2)];
34303 need_ucomi = need_ucomi_values[INTVAL (op2)];
34304
34305 if (VECTOR_MODE_P (mode0))
34306 op0 = safe_vector_operand (op0, mode0);
34307 if (VECTOR_MODE_P (mode1))
34308 op1 = safe_vector_operand (op1, mode1);
34309
34310 target = gen_reg_rtx (SImode);
34311 emit_move_insn (target, const0_rtx);
34312 target = gen_rtx_SUBREG (QImode, target, 0);
34313
34314 if ((optimize && !register_operand (op0, mode0))
34315 || !insn_p->operand[0].predicate (op0, mode0))
34316 op0 = copy_to_mode_reg (mode0, op0);
34317 if ((optimize && !register_operand (op1, mode1))
34318 || !insn_p->operand[1].predicate (op1, mode1))
34319 op1 = copy_to_mode_reg (mode1, op1);
34320
34321 if (need_ucomi)
34322 icode = icode == CODE_FOR_sse_comi_round
34323 ? CODE_FOR_sse_ucomi_round
34324 : CODE_FOR_sse2_ucomi_round;
34325
34326 pat = GEN_FCN (icode) (op0, op1, op3);
34327 if (! pat)
34328 return 0;
34329
34330 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34331 if (INTVAL (op3) == NO_ROUND)
34332 {
34333 pat = ix86_erase_embedded_rounding (pat);
34334 if (! pat)
34335 return 0;
34336
34337 set_dst = SET_DEST (pat);
34338 }
34339 else
34340 {
34341 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34342 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34343 }
34344
34345 emit_insn (pat);
34346 emit_insn (gen_rtx_SET (VOIDmode,
34347 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34348 gen_rtx_fmt_ee (comparison, QImode,
34349 set_dst,
34350 const0_rtx)));
34351
34352 return SUBREG_REG (target);
34353 }
34354
34355 static rtx
34356 ix86_expand_round_builtin (const struct builtin_description *d,
34357 tree exp, rtx target)
34358 {
34359 rtx pat;
34360 unsigned int i, nargs;
34361 struct
34362 {
34363 rtx op;
34364 enum machine_mode mode;
34365 } args[6];
34366 enum insn_code icode = d->icode;
34367 const struct insn_data_d *insn_p = &insn_data[icode];
34368 enum machine_mode tmode = insn_p->operand[0].mode;
34369 unsigned int nargs_constant = 0;
34370 unsigned int redundant_embed_rnd = 0;
34371
34372 switch ((enum ix86_builtin_func_type) d->flag)
34373 {
34374 case UINT64_FTYPE_V2DF_INT:
34375 case UINT64_FTYPE_V4SF_INT:
34376 case UINT_FTYPE_V2DF_INT:
34377 case UINT_FTYPE_V4SF_INT:
34378 case INT64_FTYPE_V2DF_INT:
34379 case INT64_FTYPE_V4SF_INT:
34380 case INT_FTYPE_V2DF_INT:
34381 case INT_FTYPE_V4SF_INT:
34382 nargs = 2;
34383 break;
34384 case V4SF_FTYPE_V4SF_UINT_INT:
34385 case V4SF_FTYPE_V4SF_UINT64_INT:
34386 case V2DF_FTYPE_V2DF_UINT64_INT:
34387 case V4SF_FTYPE_V4SF_INT_INT:
34388 case V4SF_FTYPE_V4SF_INT64_INT:
34389 case V2DF_FTYPE_V2DF_INT64_INT:
34390 case V4SF_FTYPE_V4SF_V4SF_INT:
34391 case V2DF_FTYPE_V2DF_V2DF_INT:
34392 case V4SF_FTYPE_V4SF_V2DF_INT:
34393 case V2DF_FTYPE_V2DF_V4SF_INT:
34394 nargs = 3;
34395 break;
34396 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34397 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34398 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34399 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34400 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34401 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34402 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34403 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34404 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34405 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34406 nargs = 4;
34407 break;
34408 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34409 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34410 nargs_constant = 2;
34411 nargs = 4;
34412 break;
34413 case INT_FTYPE_V4SF_V4SF_INT_INT:
34414 case INT_FTYPE_V2DF_V2DF_INT_INT:
34415 return ix86_expand_sse_comi_round (d, exp, target);
34416 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34417 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34418 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34419 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34420 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34421 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34422 nargs = 5;
34423 break;
34424 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34425 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34426 nargs_constant = 4;
34427 nargs = 5;
34428 break;
34429 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34430 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34431 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34432 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34433 nargs_constant = 3;
34434 nargs = 5;
34435 break;
34436 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34437 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34438 nargs = 6;
34439 nargs_constant = 4;
34440 break;
34441 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34442 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34443 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34444 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34445 nargs = 6;
34446 nargs_constant = 3;
34447 break;
34448 default:
34449 gcc_unreachable ();
34450 }
34451 gcc_assert (nargs <= ARRAY_SIZE (args));
34452
34453 if (optimize
34454 || target == 0
34455 || GET_MODE (target) != tmode
34456 || !insn_p->operand[0].predicate (target, tmode))
34457 target = gen_reg_rtx (tmode);
34458
34459 for (i = 0; i < nargs; i++)
34460 {
34461 tree arg = CALL_EXPR_ARG (exp, i);
34462 rtx op = expand_normal (arg);
34463 enum machine_mode mode = insn_p->operand[i + 1].mode;
34464 bool match = insn_p->operand[i + 1].predicate (op, mode);
34465
34466 if (i == nargs - nargs_constant)
34467 {
34468 if (!match)
34469 {
34470 switch (icode)
34471 {
34472 case CODE_FOR_avx512f_getmantv8df_mask_round:
34473 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34474 case CODE_FOR_avx512f_getmantv2df_round:
34475 case CODE_FOR_avx512f_getmantv4sf_round:
34476 error ("the immediate argument must be a 4-bit immediate");
34477 return const0_rtx;
34478 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34479 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34480 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34481 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34482 error ("the immediate argument must be a 5-bit immediate");
34483 return const0_rtx;
34484 default:
34485 error ("the immediate argument must be an 8-bit immediate");
34486 return const0_rtx;
34487 }
34488 }
34489 }
34490 else if (i == nargs-1)
34491 {
34492 if (!insn_p->operand[nargs].predicate (op, SImode))
34493 {
34494 error ("incorrect rounding operand");
34495 return const0_rtx;
34496 }
34497
34498 /* If there is no rounding use normal version of the pattern. */
34499 if (INTVAL (op) == NO_ROUND)
34500 redundant_embed_rnd = 1;
34501 }
34502 else
34503 {
34504 if (VECTOR_MODE_P (mode))
34505 op = safe_vector_operand (op, mode);
34506
34507 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34508 {
34509 if (optimize || !match)
34510 op = copy_to_mode_reg (mode, op);
34511 }
34512 else
34513 {
34514 op = copy_to_reg (op);
34515 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34516 }
34517 }
34518
34519 args[i].op = op;
34520 args[i].mode = mode;
34521 }
34522
34523 switch (nargs)
34524 {
34525 case 1:
34526 pat = GEN_FCN (icode) (target, args[0].op);
34527 break;
34528 case 2:
34529 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34530 break;
34531 case 3:
34532 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34533 args[2].op);
34534 break;
34535 case 4:
34536 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34537 args[2].op, args[3].op);
34538 break;
34539 case 5:
34540 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34541 args[2].op, args[3].op, args[4].op);
34542 case 6:
34543 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34544 args[2].op, args[3].op, args[4].op,
34545 args[5].op);
34546 break;
34547 default:
34548 gcc_unreachable ();
34549 }
34550
34551 if (!pat)
34552 return 0;
34553
34554 if (redundant_embed_rnd)
34555 pat = ix86_erase_embedded_rounding (pat);
34556
34557 emit_insn (pat);
34558 return target;
34559 }
34560
34561 /* Subroutine of ix86_expand_builtin to take care of special insns
34562 with variable number of operands. */
34563
34564 static rtx
34565 ix86_expand_special_args_builtin (const struct builtin_description *d,
34566 tree exp, rtx target)
34567 {
34568 tree arg;
34569 rtx pat, op;
34570 unsigned int i, nargs, arg_adjust, memory;
34571 bool aligned_mem = false;
34572 struct
34573 {
34574 rtx op;
34575 enum machine_mode mode;
34576 } args[3];
34577 enum insn_code icode = d->icode;
34578 bool last_arg_constant = false;
34579 const struct insn_data_d *insn_p = &insn_data[icode];
34580 enum machine_mode tmode = insn_p->operand[0].mode;
34581 enum { load, store } klass;
34582
34583 switch ((enum ix86_builtin_func_type) d->flag)
34584 {
34585 case VOID_FTYPE_VOID:
34586 emit_insn (GEN_FCN (icode) (target));
34587 return 0;
34588 case VOID_FTYPE_UINT64:
34589 case VOID_FTYPE_UNSIGNED:
34590 nargs = 0;
34591 klass = store;
34592 memory = 0;
34593 break;
34594
34595 case INT_FTYPE_VOID:
34596 case USHORT_FTYPE_VOID:
34597 case UINT64_FTYPE_VOID:
34598 case UNSIGNED_FTYPE_VOID:
34599 nargs = 0;
34600 klass = load;
34601 memory = 0;
34602 break;
34603 case UINT64_FTYPE_PUNSIGNED:
34604 case V2DI_FTYPE_PV2DI:
34605 case V4DI_FTYPE_PV4DI:
34606 case V32QI_FTYPE_PCCHAR:
34607 case V16QI_FTYPE_PCCHAR:
34608 case V8SF_FTYPE_PCV4SF:
34609 case V8SF_FTYPE_PCFLOAT:
34610 case V4SF_FTYPE_PCFLOAT:
34611 case V4DF_FTYPE_PCV2DF:
34612 case V4DF_FTYPE_PCDOUBLE:
34613 case V2DF_FTYPE_PCDOUBLE:
34614 case VOID_FTYPE_PVOID:
34615 case V16SI_FTYPE_PV4SI:
34616 case V16SF_FTYPE_PV4SF:
34617 case V8DI_FTYPE_PV4DI:
34618 case V8DI_FTYPE_PV8DI:
34619 case V8DF_FTYPE_PV4DF:
34620 nargs = 1;
34621 klass = load;
34622 memory = 0;
34623 switch (icode)
34624 {
34625 case CODE_FOR_sse4_1_movntdqa:
34626 case CODE_FOR_avx2_movntdqa:
34627 case CODE_FOR_avx512f_movntdqa:
34628 aligned_mem = true;
34629 break;
34630 default:
34631 break;
34632 }
34633 break;
34634 case VOID_FTYPE_PV2SF_V4SF:
34635 case VOID_FTYPE_PV8DI_V8DI:
34636 case VOID_FTYPE_PV4DI_V4DI:
34637 case VOID_FTYPE_PV2DI_V2DI:
34638 case VOID_FTYPE_PCHAR_V32QI:
34639 case VOID_FTYPE_PCHAR_V16QI:
34640 case VOID_FTYPE_PFLOAT_V16SF:
34641 case VOID_FTYPE_PFLOAT_V8SF:
34642 case VOID_FTYPE_PFLOAT_V4SF:
34643 case VOID_FTYPE_PDOUBLE_V8DF:
34644 case VOID_FTYPE_PDOUBLE_V4DF:
34645 case VOID_FTYPE_PDOUBLE_V2DF:
34646 case VOID_FTYPE_PLONGLONG_LONGLONG:
34647 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34648 case VOID_FTYPE_PINT_INT:
34649 nargs = 1;
34650 klass = store;
34651 /* Reserve memory operand for target. */
34652 memory = ARRAY_SIZE (args);
34653 switch (icode)
34654 {
34655 /* These builtins and instructions require the memory
34656 to be properly aligned. */
34657 case CODE_FOR_avx_movntv4di:
34658 case CODE_FOR_sse2_movntv2di:
34659 case CODE_FOR_avx_movntv8sf:
34660 case CODE_FOR_sse_movntv4sf:
34661 case CODE_FOR_sse4a_vmmovntv4sf:
34662 case CODE_FOR_avx_movntv4df:
34663 case CODE_FOR_sse2_movntv2df:
34664 case CODE_FOR_sse4a_vmmovntv2df:
34665 case CODE_FOR_sse2_movntidi:
34666 case CODE_FOR_sse_movntq:
34667 case CODE_FOR_sse2_movntisi:
34668 case CODE_FOR_avx512f_movntv16sf:
34669 case CODE_FOR_avx512f_movntv8df:
34670 case CODE_FOR_avx512f_movntv8di:
34671 aligned_mem = true;
34672 break;
34673 default:
34674 break;
34675 }
34676 break;
34677 case V4SF_FTYPE_V4SF_PCV2SF:
34678 case V2DF_FTYPE_V2DF_PCDOUBLE:
34679 nargs = 2;
34680 klass = load;
34681 memory = 1;
34682 break;
34683 case V8SF_FTYPE_PCV8SF_V8SI:
34684 case V4DF_FTYPE_PCV4DF_V4DI:
34685 case V4SF_FTYPE_PCV4SF_V4SI:
34686 case V2DF_FTYPE_PCV2DF_V2DI:
34687 case V8SI_FTYPE_PCV8SI_V8SI:
34688 case V4DI_FTYPE_PCV4DI_V4DI:
34689 case V4SI_FTYPE_PCV4SI_V4SI:
34690 case V2DI_FTYPE_PCV2DI_V2DI:
34691 nargs = 2;
34692 klass = load;
34693 memory = 0;
34694 break;
34695 case VOID_FTYPE_PV8DF_V8DF_QI:
34696 case VOID_FTYPE_PV16SF_V16SF_HI:
34697 case VOID_FTYPE_PV8DI_V8DI_QI:
34698 case VOID_FTYPE_PV16SI_V16SI_HI:
34699 switch (icode)
34700 {
34701 /* These builtins and instructions require the memory
34702 to be properly aligned. */
34703 case CODE_FOR_avx512f_storev16sf_mask:
34704 case CODE_FOR_avx512f_storev16si_mask:
34705 case CODE_FOR_avx512f_storev8df_mask:
34706 case CODE_FOR_avx512f_storev8di_mask:
34707 aligned_mem = true;
34708 break;
34709 default:
34710 break;
34711 }
34712 /* FALLTHRU */
34713 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34714 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34715 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34716 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34717 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34718 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34719 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34720 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34721 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34722 case VOID_FTYPE_PFLOAT_V4SF_QI:
34723 case VOID_FTYPE_PV8SI_V8DI_QI:
34724 case VOID_FTYPE_PV8HI_V8DI_QI:
34725 case VOID_FTYPE_PV16HI_V16SI_HI:
34726 case VOID_FTYPE_PV16QI_V8DI_QI:
34727 case VOID_FTYPE_PV16QI_V16SI_HI:
34728 nargs = 2;
34729 klass = store;
34730 /* Reserve memory operand for target. */
34731 memory = ARRAY_SIZE (args);
34732 break;
34733 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34734 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34735 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34736 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34737 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34738 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34739 nargs = 3;
34740 klass = load;
34741 memory = 0;
34742 switch (icode)
34743 {
34744 /* These builtins and instructions require the memory
34745 to be properly aligned. */
34746 case CODE_FOR_avx512f_loadv16sf_mask:
34747 case CODE_FOR_avx512f_loadv16si_mask:
34748 case CODE_FOR_avx512f_loadv8df_mask:
34749 case CODE_FOR_avx512f_loadv8di_mask:
34750 aligned_mem = true;
34751 break;
34752 default:
34753 break;
34754 }
34755 break;
34756 case VOID_FTYPE_UINT_UINT_UINT:
34757 case VOID_FTYPE_UINT64_UINT_UINT:
34758 case UCHAR_FTYPE_UINT_UINT_UINT:
34759 case UCHAR_FTYPE_UINT64_UINT_UINT:
34760 nargs = 3;
34761 klass = load;
34762 memory = ARRAY_SIZE (args);
34763 last_arg_constant = true;
34764 break;
34765 default:
34766 gcc_unreachable ();
34767 }
34768
34769 gcc_assert (nargs <= ARRAY_SIZE (args));
34770
34771 if (klass == store)
34772 {
34773 arg = CALL_EXPR_ARG (exp, 0);
34774 op = expand_normal (arg);
34775 gcc_assert (target == 0);
34776 if (memory)
34777 {
34778 op = ix86_zero_extend_to_Pmode (op);
34779 target = gen_rtx_MEM (tmode, op);
34780 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34781 on it. Try to improve it using get_pointer_alignment,
34782 and if the special builtin is one that requires strict
34783 mode alignment, also from it's GET_MODE_ALIGNMENT.
34784 Failure to do so could lead to ix86_legitimate_combined_insn
34785 rejecting all changes to such insns. */
34786 unsigned int align = get_pointer_alignment (arg);
34787 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34788 align = GET_MODE_ALIGNMENT (tmode);
34789 if (MEM_ALIGN (target) < align)
34790 set_mem_align (target, align);
34791 }
34792 else
34793 target = force_reg (tmode, op);
34794 arg_adjust = 1;
34795 }
34796 else
34797 {
34798 arg_adjust = 0;
34799 if (optimize
34800 || target == 0
34801 || !register_operand (target, tmode)
34802 || GET_MODE (target) != tmode)
34803 target = gen_reg_rtx (tmode);
34804 }
34805
34806 for (i = 0; i < nargs; i++)
34807 {
34808 enum machine_mode mode = insn_p->operand[i + 1].mode;
34809 bool match;
34810
34811 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34812 op = expand_normal (arg);
34813 match = insn_p->operand[i + 1].predicate (op, mode);
34814
34815 if (last_arg_constant && (i + 1) == nargs)
34816 {
34817 if (!match)
34818 {
34819 if (icode == CODE_FOR_lwp_lwpvalsi3
34820 || icode == CODE_FOR_lwp_lwpinssi3
34821 || icode == CODE_FOR_lwp_lwpvaldi3
34822 || icode == CODE_FOR_lwp_lwpinsdi3)
34823 error ("the last argument must be a 32-bit immediate");
34824 else
34825 error ("the last argument must be an 8-bit immediate");
34826 return const0_rtx;
34827 }
34828 }
34829 else
34830 {
34831 if (i == memory)
34832 {
34833 /* This must be the memory operand. */
34834 op = ix86_zero_extend_to_Pmode (op);
34835 op = gen_rtx_MEM (mode, op);
34836 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34837 on it. Try to improve it using get_pointer_alignment,
34838 and if the special builtin is one that requires strict
34839 mode alignment, also from it's GET_MODE_ALIGNMENT.
34840 Failure to do so could lead to ix86_legitimate_combined_insn
34841 rejecting all changes to such insns. */
34842 unsigned int align = get_pointer_alignment (arg);
34843 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34844 align = GET_MODE_ALIGNMENT (mode);
34845 if (MEM_ALIGN (op) < align)
34846 set_mem_align (op, align);
34847 }
34848 else
34849 {
34850 /* This must be register. */
34851 if (VECTOR_MODE_P (mode))
34852 op = safe_vector_operand (op, mode);
34853
34854 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34855 op = copy_to_mode_reg (mode, op);
34856 else
34857 {
34858 op = copy_to_reg (op);
34859 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34860 }
34861 }
34862 }
34863
34864 args[i].op = op;
34865 args[i].mode = mode;
34866 }
34867
34868 switch (nargs)
34869 {
34870 case 0:
34871 pat = GEN_FCN (icode) (target);
34872 break;
34873 case 1:
34874 pat = GEN_FCN (icode) (target, args[0].op);
34875 break;
34876 case 2:
34877 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34878 break;
34879 case 3:
34880 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34881 break;
34882 default:
34883 gcc_unreachable ();
34884 }
34885
34886 if (! pat)
34887 return 0;
34888 emit_insn (pat);
34889 return klass == store ? 0 : target;
34890 }
34891
34892 /* Return the integer constant in ARG. Constrain it to be in the range
34893 of the subparts of VEC_TYPE; issue an error if not. */
34894
34895 static int
34896 get_element_number (tree vec_type, tree arg)
34897 {
34898 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34899
34900 if (!tree_fits_uhwi_p (arg)
34901 || (elt = tree_to_uhwi (arg), elt > max))
34902 {
34903 error ("selector must be an integer constant in the range 0..%wi", max);
34904 return 0;
34905 }
34906
34907 return elt;
34908 }
34909
34910 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34911 ix86_expand_vector_init. We DO have language-level syntax for this, in
34912 the form of (type){ init-list }. Except that since we can't place emms
34913 instructions from inside the compiler, we can't allow the use of MMX
34914 registers unless the user explicitly asks for it. So we do *not* define
34915 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34916 we have builtins invoked by mmintrin.h that gives us license to emit
34917 these sorts of instructions. */
34918
34919 static rtx
34920 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34921 {
34922 enum machine_mode tmode = TYPE_MODE (type);
34923 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34924 int i, n_elt = GET_MODE_NUNITS (tmode);
34925 rtvec v = rtvec_alloc (n_elt);
34926
34927 gcc_assert (VECTOR_MODE_P (tmode));
34928 gcc_assert (call_expr_nargs (exp) == n_elt);
34929
34930 for (i = 0; i < n_elt; ++i)
34931 {
34932 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34933 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34934 }
34935
34936 if (!target || !register_operand (target, tmode))
34937 target = gen_reg_rtx (tmode);
34938
34939 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34940 return target;
34941 }
34942
34943 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34944 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34945 had a language-level syntax for referencing vector elements. */
34946
34947 static rtx
34948 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34949 {
34950 enum machine_mode tmode, mode0;
34951 tree arg0, arg1;
34952 int elt;
34953 rtx op0;
34954
34955 arg0 = CALL_EXPR_ARG (exp, 0);
34956 arg1 = CALL_EXPR_ARG (exp, 1);
34957
34958 op0 = expand_normal (arg0);
34959 elt = get_element_number (TREE_TYPE (arg0), arg1);
34960
34961 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34962 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34963 gcc_assert (VECTOR_MODE_P (mode0));
34964
34965 op0 = force_reg (mode0, op0);
34966
34967 if (optimize || !target || !register_operand (target, tmode))
34968 target = gen_reg_rtx (tmode);
34969
34970 ix86_expand_vector_extract (true, target, op0, elt);
34971
34972 return target;
34973 }
34974
34975 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34976 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34977 a language-level syntax for referencing vector elements. */
34978
34979 static rtx
34980 ix86_expand_vec_set_builtin (tree exp)
34981 {
34982 enum machine_mode tmode, mode1;
34983 tree arg0, arg1, arg2;
34984 int elt;
34985 rtx op0, op1, target;
34986
34987 arg0 = CALL_EXPR_ARG (exp, 0);
34988 arg1 = CALL_EXPR_ARG (exp, 1);
34989 arg2 = CALL_EXPR_ARG (exp, 2);
34990
34991 tmode = TYPE_MODE (TREE_TYPE (arg0));
34992 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34993 gcc_assert (VECTOR_MODE_P (tmode));
34994
34995 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34996 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34997 elt = get_element_number (TREE_TYPE (arg0), arg2);
34998
34999 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35000 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35001
35002 op0 = force_reg (tmode, op0);
35003 op1 = force_reg (mode1, op1);
35004
35005 /* OP0 is the source of these builtin functions and shouldn't be
35006 modified. Create a copy, use it and return it as target. */
35007 target = gen_reg_rtx (tmode);
35008 emit_move_insn (target, op0);
35009 ix86_expand_vector_set (true, target, op1, elt);
35010
35011 return target;
35012 }
35013
35014 /* Expand an expression EXP that calls a built-in function,
35015 with result going to TARGET if that's convenient
35016 (and in mode MODE if that's convenient).
35017 SUBTARGET may be used as the target for computing one of EXP's operands.
35018 IGNORE is nonzero if the value is to be ignored. */
35019
35020 static rtx
35021 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35022 enum machine_mode mode, int ignore)
35023 {
35024 const struct builtin_description *d;
35025 size_t i;
35026 enum insn_code icode;
35027 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35028 tree arg0, arg1, arg2, arg3, arg4;
35029 rtx op0, op1, op2, op3, op4, pat, insn;
35030 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35031 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35032
35033 /* For CPU builtins that can be folded, fold first and expand the fold. */
35034 switch (fcode)
35035 {
35036 case IX86_BUILTIN_CPU_INIT:
35037 {
35038 /* Make it call __cpu_indicator_init in libgcc. */
35039 tree call_expr, fndecl, type;
35040 type = build_function_type_list (integer_type_node, NULL_TREE);
35041 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35042 call_expr = build_call_expr (fndecl, 0);
35043 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35044 }
35045 case IX86_BUILTIN_CPU_IS:
35046 case IX86_BUILTIN_CPU_SUPPORTS:
35047 {
35048 tree arg0 = CALL_EXPR_ARG (exp, 0);
35049 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35050 gcc_assert (fold_expr != NULL_TREE);
35051 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35052 }
35053 }
35054
35055 /* Determine whether the builtin function is available under the current ISA.
35056 Originally the builtin was not created if it wasn't applicable to the
35057 current ISA based on the command line switches. With function specific
35058 options, we need to check in the context of the function making the call
35059 whether it is supported. */
35060 if (ix86_builtins_isa[fcode].isa
35061 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35062 {
35063 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35064 NULL, (enum fpmath_unit) 0, false);
35065
35066 if (!opts)
35067 error ("%qE needs unknown isa option", fndecl);
35068 else
35069 {
35070 gcc_assert (opts != NULL);
35071 error ("%qE needs isa option %s", fndecl, opts);
35072 free (opts);
35073 }
35074 return const0_rtx;
35075 }
35076
35077 switch (fcode)
35078 {
35079 case IX86_BUILTIN_MASKMOVQ:
35080 case IX86_BUILTIN_MASKMOVDQU:
35081 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35082 ? CODE_FOR_mmx_maskmovq
35083 : CODE_FOR_sse2_maskmovdqu);
35084 /* Note the arg order is different from the operand order. */
35085 arg1 = CALL_EXPR_ARG (exp, 0);
35086 arg2 = CALL_EXPR_ARG (exp, 1);
35087 arg0 = CALL_EXPR_ARG (exp, 2);
35088 op0 = expand_normal (arg0);
35089 op1 = expand_normal (arg1);
35090 op2 = expand_normal (arg2);
35091 mode0 = insn_data[icode].operand[0].mode;
35092 mode1 = insn_data[icode].operand[1].mode;
35093 mode2 = insn_data[icode].operand[2].mode;
35094
35095 op0 = ix86_zero_extend_to_Pmode (op0);
35096 op0 = gen_rtx_MEM (mode1, op0);
35097
35098 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35099 op0 = copy_to_mode_reg (mode0, op0);
35100 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35101 op1 = copy_to_mode_reg (mode1, op1);
35102 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35103 op2 = copy_to_mode_reg (mode2, op2);
35104 pat = GEN_FCN (icode) (op0, op1, op2);
35105 if (! pat)
35106 return 0;
35107 emit_insn (pat);
35108 return 0;
35109
35110 case IX86_BUILTIN_LDMXCSR:
35111 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35112 target = assign_386_stack_local (SImode, SLOT_TEMP);
35113 emit_move_insn (target, op0);
35114 emit_insn (gen_sse_ldmxcsr (target));
35115 return 0;
35116
35117 case IX86_BUILTIN_STMXCSR:
35118 target = assign_386_stack_local (SImode, SLOT_TEMP);
35119 emit_insn (gen_sse_stmxcsr (target));
35120 return copy_to_mode_reg (SImode, target);
35121
35122 case IX86_BUILTIN_CLFLUSH:
35123 arg0 = CALL_EXPR_ARG (exp, 0);
35124 op0 = expand_normal (arg0);
35125 icode = CODE_FOR_sse2_clflush;
35126 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35127 op0 = ix86_zero_extend_to_Pmode (op0);
35128
35129 emit_insn (gen_sse2_clflush (op0));
35130 return 0;
35131
35132 case IX86_BUILTIN_CLFLUSHOPT:
35133 arg0 = CALL_EXPR_ARG (exp, 0);
35134 op0 = expand_normal (arg0);
35135 icode = CODE_FOR_clflushopt;
35136 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35137 op0 = ix86_zero_extend_to_Pmode (op0);
35138
35139 emit_insn (gen_clflushopt (op0));
35140 return 0;
35141
35142 case IX86_BUILTIN_MONITOR:
35143 arg0 = CALL_EXPR_ARG (exp, 0);
35144 arg1 = CALL_EXPR_ARG (exp, 1);
35145 arg2 = CALL_EXPR_ARG (exp, 2);
35146 op0 = expand_normal (arg0);
35147 op1 = expand_normal (arg1);
35148 op2 = expand_normal (arg2);
35149 if (!REG_P (op0))
35150 op0 = ix86_zero_extend_to_Pmode (op0);
35151 if (!REG_P (op1))
35152 op1 = copy_to_mode_reg (SImode, op1);
35153 if (!REG_P (op2))
35154 op2 = copy_to_mode_reg (SImode, op2);
35155 emit_insn (ix86_gen_monitor (op0, op1, op2));
35156 return 0;
35157
35158 case IX86_BUILTIN_MWAIT:
35159 arg0 = CALL_EXPR_ARG (exp, 0);
35160 arg1 = CALL_EXPR_ARG (exp, 1);
35161 op0 = expand_normal (arg0);
35162 op1 = expand_normal (arg1);
35163 if (!REG_P (op0))
35164 op0 = copy_to_mode_reg (SImode, op0);
35165 if (!REG_P (op1))
35166 op1 = copy_to_mode_reg (SImode, op1);
35167 emit_insn (gen_sse3_mwait (op0, op1));
35168 return 0;
35169
35170 case IX86_BUILTIN_VEC_INIT_V2SI:
35171 case IX86_BUILTIN_VEC_INIT_V4HI:
35172 case IX86_BUILTIN_VEC_INIT_V8QI:
35173 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35174
35175 case IX86_BUILTIN_VEC_EXT_V2DF:
35176 case IX86_BUILTIN_VEC_EXT_V2DI:
35177 case IX86_BUILTIN_VEC_EXT_V4SF:
35178 case IX86_BUILTIN_VEC_EXT_V4SI:
35179 case IX86_BUILTIN_VEC_EXT_V8HI:
35180 case IX86_BUILTIN_VEC_EXT_V2SI:
35181 case IX86_BUILTIN_VEC_EXT_V4HI:
35182 case IX86_BUILTIN_VEC_EXT_V16QI:
35183 return ix86_expand_vec_ext_builtin (exp, target);
35184
35185 case IX86_BUILTIN_VEC_SET_V2DI:
35186 case IX86_BUILTIN_VEC_SET_V4SF:
35187 case IX86_BUILTIN_VEC_SET_V4SI:
35188 case IX86_BUILTIN_VEC_SET_V8HI:
35189 case IX86_BUILTIN_VEC_SET_V4HI:
35190 case IX86_BUILTIN_VEC_SET_V16QI:
35191 return ix86_expand_vec_set_builtin (exp);
35192
35193 case IX86_BUILTIN_INFQ:
35194 case IX86_BUILTIN_HUGE_VALQ:
35195 {
35196 REAL_VALUE_TYPE inf;
35197 rtx tmp;
35198
35199 real_inf (&inf);
35200 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35201
35202 tmp = validize_mem (force_const_mem (mode, tmp));
35203
35204 if (target == 0)
35205 target = gen_reg_rtx (mode);
35206
35207 emit_move_insn (target, tmp);
35208 return target;
35209 }
35210
35211 case IX86_BUILTIN_RDPMC:
35212 case IX86_BUILTIN_RDTSC:
35213 case IX86_BUILTIN_RDTSCP:
35214
35215 op0 = gen_reg_rtx (DImode);
35216 op1 = gen_reg_rtx (DImode);
35217
35218 if (fcode == IX86_BUILTIN_RDPMC)
35219 {
35220 arg0 = CALL_EXPR_ARG (exp, 0);
35221 op2 = expand_normal (arg0);
35222 if (!register_operand (op2, SImode))
35223 op2 = copy_to_mode_reg (SImode, op2);
35224
35225 insn = (TARGET_64BIT
35226 ? gen_rdpmc_rex64 (op0, op1, op2)
35227 : gen_rdpmc (op0, op2));
35228 emit_insn (insn);
35229 }
35230 else if (fcode == IX86_BUILTIN_RDTSC)
35231 {
35232 insn = (TARGET_64BIT
35233 ? gen_rdtsc_rex64 (op0, op1)
35234 : gen_rdtsc (op0));
35235 emit_insn (insn);
35236 }
35237 else
35238 {
35239 op2 = gen_reg_rtx (SImode);
35240
35241 insn = (TARGET_64BIT
35242 ? gen_rdtscp_rex64 (op0, op1, op2)
35243 : gen_rdtscp (op0, op2));
35244 emit_insn (insn);
35245
35246 arg0 = CALL_EXPR_ARG (exp, 0);
35247 op4 = expand_normal (arg0);
35248 if (!address_operand (op4, VOIDmode))
35249 {
35250 op4 = convert_memory_address (Pmode, op4);
35251 op4 = copy_addr_to_reg (op4);
35252 }
35253 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35254 }
35255
35256 if (target == 0)
35257 {
35258 /* mode is VOIDmode if __builtin_rd* has been called
35259 without lhs. */
35260 if (mode == VOIDmode)
35261 return target;
35262 target = gen_reg_rtx (mode);
35263 }
35264
35265 if (TARGET_64BIT)
35266 {
35267 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35268 op1, 1, OPTAB_DIRECT);
35269 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35270 op0, 1, OPTAB_DIRECT);
35271 }
35272
35273 emit_move_insn (target, op0);
35274 return target;
35275
35276 case IX86_BUILTIN_FXSAVE:
35277 case IX86_BUILTIN_FXRSTOR:
35278 case IX86_BUILTIN_FXSAVE64:
35279 case IX86_BUILTIN_FXRSTOR64:
35280 case IX86_BUILTIN_FNSTENV:
35281 case IX86_BUILTIN_FLDENV:
35282 mode0 = BLKmode;
35283 switch (fcode)
35284 {
35285 case IX86_BUILTIN_FXSAVE:
35286 icode = CODE_FOR_fxsave;
35287 break;
35288 case IX86_BUILTIN_FXRSTOR:
35289 icode = CODE_FOR_fxrstor;
35290 break;
35291 case IX86_BUILTIN_FXSAVE64:
35292 icode = CODE_FOR_fxsave64;
35293 break;
35294 case IX86_BUILTIN_FXRSTOR64:
35295 icode = CODE_FOR_fxrstor64;
35296 break;
35297 case IX86_BUILTIN_FNSTENV:
35298 icode = CODE_FOR_fnstenv;
35299 break;
35300 case IX86_BUILTIN_FLDENV:
35301 icode = CODE_FOR_fldenv;
35302 break;
35303 default:
35304 gcc_unreachable ();
35305 }
35306
35307 arg0 = CALL_EXPR_ARG (exp, 0);
35308 op0 = expand_normal (arg0);
35309
35310 if (!address_operand (op0, VOIDmode))
35311 {
35312 op0 = convert_memory_address (Pmode, op0);
35313 op0 = copy_addr_to_reg (op0);
35314 }
35315 op0 = gen_rtx_MEM (mode0, op0);
35316
35317 pat = GEN_FCN (icode) (op0);
35318 if (pat)
35319 emit_insn (pat);
35320 return 0;
35321
35322 case IX86_BUILTIN_XSAVE:
35323 case IX86_BUILTIN_XRSTOR:
35324 case IX86_BUILTIN_XSAVE64:
35325 case IX86_BUILTIN_XRSTOR64:
35326 case IX86_BUILTIN_XSAVEOPT:
35327 case IX86_BUILTIN_XSAVEOPT64:
35328 case IX86_BUILTIN_XSAVES:
35329 case IX86_BUILTIN_XRSTORS:
35330 case IX86_BUILTIN_XSAVES64:
35331 case IX86_BUILTIN_XRSTORS64:
35332 case IX86_BUILTIN_XSAVEC:
35333 case IX86_BUILTIN_XSAVEC64:
35334 arg0 = CALL_EXPR_ARG (exp, 0);
35335 arg1 = CALL_EXPR_ARG (exp, 1);
35336 op0 = expand_normal (arg0);
35337 op1 = expand_normal (arg1);
35338
35339 if (!address_operand (op0, VOIDmode))
35340 {
35341 op0 = convert_memory_address (Pmode, op0);
35342 op0 = copy_addr_to_reg (op0);
35343 }
35344 op0 = gen_rtx_MEM (BLKmode, op0);
35345
35346 op1 = force_reg (DImode, op1);
35347
35348 if (TARGET_64BIT)
35349 {
35350 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35351 NULL, 1, OPTAB_DIRECT);
35352 switch (fcode)
35353 {
35354 case IX86_BUILTIN_XSAVE:
35355 icode = CODE_FOR_xsave_rex64;
35356 break;
35357 case IX86_BUILTIN_XRSTOR:
35358 icode = CODE_FOR_xrstor_rex64;
35359 break;
35360 case IX86_BUILTIN_XSAVE64:
35361 icode = CODE_FOR_xsave64;
35362 break;
35363 case IX86_BUILTIN_XRSTOR64:
35364 icode = CODE_FOR_xrstor64;
35365 break;
35366 case IX86_BUILTIN_XSAVEOPT:
35367 icode = CODE_FOR_xsaveopt_rex64;
35368 break;
35369 case IX86_BUILTIN_XSAVEOPT64:
35370 icode = CODE_FOR_xsaveopt64;
35371 break;
35372 case IX86_BUILTIN_XSAVES:
35373 icode = CODE_FOR_xsaves_rex64;
35374 break;
35375 case IX86_BUILTIN_XRSTORS:
35376 icode = CODE_FOR_xrstors_rex64;
35377 break;
35378 case IX86_BUILTIN_XSAVES64:
35379 icode = CODE_FOR_xsaves64;
35380 break;
35381 case IX86_BUILTIN_XRSTORS64:
35382 icode = CODE_FOR_xrstors64;
35383 break;
35384 case IX86_BUILTIN_XSAVEC:
35385 icode = CODE_FOR_xsavec_rex64;
35386 break;
35387 case IX86_BUILTIN_XSAVEC64:
35388 icode = CODE_FOR_xsavec64;
35389 break;
35390 default:
35391 gcc_unreachable ();
35392 }
35393
35394 op2 = gen_lowpart (SImode, op2);
35395 op1 = gen_lowpart (SImode, op1);
35396 pat = GEN_FCN (icode) (op0, op1, op2);
35397 }
35398 else
35399 {
35400 switch (fcode)
35401 {
35402 case IX86_BUILTIN_XSAVE:
35403 icode = CODE_FOR_xsave;
35404 break;
35405 case IX86_BUILTIN_XRSTOR:
35406 icode = CODE_FOR_xrstor;
35407 break;
35408 case IX86_BUILTIN_XSAVEOPT:
35409 icode = CODE_FOR_xsaveopt;
35410 break;
35411 case IX86_BUILTIN_XSAVES:
35412 icode = CODE_FOR_xsaves;
35413 break;
35414 case IX86_BUILTIN_XRSTORS:
35415 icode = CODE_FOR_xrstors;
35416 break;
35417 case IX86_BUILTIN_XSAVEC:
35418 icode = CODE_FOR_xsavec;
35419 break;
35420 default:
35421 gcc_unreachable ();
35422 }
35423 pat = GEN_FCN (icode) (op0, op1);
35424 }
35425
35426 if (pat)
35427 emit_insn (pat);
35428 return 0;
35429
35430 case IX86_BUILTIN_LLWPCB:
35431 arg0 = CALL_EXPR_ARG (exp, 0);
35432 op0 = expand_normal (arg0);
35433 icode = CODE_FOR_lwp_llwpcb;
35434 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35435 op0 = ix86_zero_extend_to_Pmode (op0);
35436 emit_insn (gen_lwp_llwpcb (op0));
35437 return 0;
35438
35439 case IX86_BUILTIN_SLWPCB:
35440 icode = CODE_FOR_lwp_slwpcb;
35441 if (!target
35442 || !insn_data[icode].operand[0].predicate (target, Pmode))
35443 target = gen_reg_rtx (Pmode);
35444 emit_insn (gen_lwp_slwpcb (target));
35445 return target;
35446
35447 case IX86_BUILTIN_BEXTRI32:
35448 case IX86_BUILTIN_BEXTRI64:
35449 arg0 = CALL_EXPR_ARG (exp, 0);
35450 arg1 = CALL_EXPR_ARG (exp, 1);
35451 op0 = expand_normal (arg0);
35452 op1 = expand_normal (arg1);
35453 icode = (fcode == IX86_BUILTIN_BEXTRI32
35454 ? CODE_FOR_tbm_bextri_si
35455 : CODE_FOR_tbm_bextri_di);
35456 if (!CONST_INT_P (op1))
35457 {
35458 error ("last argument must be an immediate");
35459 return const0_rtx;
35460 }
35461 else
35462 {
35463 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35464 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35465 op1 = GEN_INT (length);
35466 op2 = GEN_INT (lsb_index);
35467 pat = GEN_FCN (icode) (target, op0, op1, op2);
35468 if (pat)
35469 emit_insn (pat);
35470 return target;
35471 }
35472
35473 case IX86_BUILTIN_RDRAND16_STEP:
35474 icode = CODE_FOR_rdrandhi_1;
35475 mode0 = HImode;
35476 goto rdrand_step;
35477
35478 case IX86_BUILTIN_RDRAND32_STEP:
35479 icode = CODE_FOR_rdrandsi_1;
35480 mode0 = SImode;
35481 goto rdrand_step;
35482
35483 case IX86_BUILTIN_RDRAND64_STEP:
35484 icode = CODE_FOR_rdranddi_1;
35485 mode0 = DImode;
35486
35487 rdrand_step:
35488 op0 = gen_reg_rtx (mode0);
35489 emit_insn (GEN_FCN (icode) (op0));
35490
35491 arg0 = CALL_EXPR_ARG (exp, 0);
35492 op1 = expand_normal (arg0);
35493 if (!address_operand (op1, VOIDmode))
35494 {
35495 op1 = convert_memory_address (Pmode, op1);
35496 op1 = copy_addr_to_reg (op1);
35497 }
35498 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35499
35500 op1 = gen_reg_rtx (SImode);
35501 emit_move_insn (op1, CONST1_RTX (SImode));
35502
35503 /* Emit SImode conditional move. */
35504 if (mode0 == HImode)
35505 {
35506 op2 = gen_reg_rtx (SImode);
35507 emit_insn (gen_zero_extendhisi2 (op2, op0));
35508 }
35509 else if (mode0 == SImode)
35510 op2 = op0;
35511 else
35512 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35513
35514 if (target == 0
35515 || !register_operand (target, SImode))
35516 target = gen_reg_rtx (SImode);
35517
35518 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35519 const0_rtx);
35520 emit_insn (gen_rtx_SET (VOIDmode, target,
35521 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35522 return target;
35523
35524 case IX86_BUILTIN_RDSEED16_STEP:
35525 icode = CODE_FOR_rdseedhi_1;
35526 mode0 = HImode;
35527 goto rdseed_step;
35528
35529 case IX86_BUILTIN_RDSEED32_STEP:
35530 icode = CODE_FOR_rdseedsi_1;
35531 mode0 = SImode;
35532 goto rdseed_step;
35533
35534 case IX86_BUILTIN_RDSEED64_STEP:
35535 icode = CODE_FOR_rdseeddi_1;
35536 mode0 = DImode;
35537
35538 rdseed_step:
35539 op0 = gen_reg_rtx (mode0);
35540 emit_insn (GEN_FCN (icode) (op0));
35541
35542 arg0 = CALL_EXPR_ARG (exp, 0);
35543 op1 = expand_normal (arg0);
35544 if (!address_operand (op1, VOIDmode))
35545 {
35546 op1 = convert_memory_address (Pmode, op1);
35547 op1 = copy_addr_to_reg (op1);
35548 }
35549 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35550
35551 op2 = gen_reg_rtx (QImode);
35552
35553 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35554 const0_rtx);
35555 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35556
35557 if (target == 0
35558 || !register_operand (target, SImode))
35559 target = gen_reg_rtx (SImode);
35560
35561 emit_insn (gen_zero_extendqisi2 (target, op2));
35562 return target;
35563
35564 case IX86_BUILTIN_ADDCARRYX32:
35565 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35566 mode0 = SImode;
35567 goto addcarryx;
35568
35569 case IX86_BUILTIN_ADDCARRYX64:
35570 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35571 mode0 = DImode;
35572
35573 addcarryx:
35574 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35575 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35576 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35577 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35578
35579 op0 = gen_reg_rtx (QImode);
35580
35581 /* Generate CF from input operand. */
35582 op1 = expand_normal (arg0);
35583 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35584 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35585
35586 /* Gen ADCX instruction to compute X+Y+CF. */
35587 op2 = expand_normal (arg1);
35588 op3 = expand_normal (arg2);
35589
35590 if (!REG_P (op2))
35591 op2 = copy_to_mode_reg (mode0, op2);
35592 if (!REG_P (op3))
35593 op3 = copy_to_mode_reg (mode0, op3);
35594
35595 op0 = gen_reg_rtx (mode0);
35596
35597 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35598 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35599 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35600
35601 /* Store the result. */
35602 op4 = expand_normal (arg3);
35603 if (!address_operand (op4, VOIDmode))
35604 {
35605 op4 = convert_memory_address (Pmode, op4);
35606 op4 = copy_addr_to_reg (op4);
35607 }
35608 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35609
35610 /* Return current CF value. */
35611 if (target == 0)
35612 target = gen_reg_rtx (QImode);
35613
35614 PUT_MODE (pat, QImode);
35615 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35616 return target;
35617
35618 case IX86_BUILTIN_READ_FLAGS:
35619 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35620
35621 if (optimize
35622 || target == NULL_RTX
35623 || !nonimmediate_operand (target, word_mode)
35624 || GET_MODE (target) != word_mode)
35625 target = gen_reg_rtx (word_mode);
35626
35627 emit_insn (gen_pop (target));
35628 return target;
35629
35630 case IX86_BUILTIN_WRITE_FLAGS:
35631
35632 arg0 = CALL_EXPR_ARG (exp, 0);
35633 op0 = expand_normal (arg0);
35634 if (!general_no_elim_operand (op0, word_mode))
35635 op0 = copy_to_mode_reg (word_mode, op0);
35636
35637 emit_insn (gen_push (op0));
35638 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35639 return 0;
35640
35641 case IX86_BUILTIN_KORTESTC16:
35642 icode = CODE_FOR_kortestchi;
35643 mode0 = HImode;
35644 mode1 = CCCmode;
35645 goto kortest;
35646
35647 case IX86_BUILTIN_KORTESTZ16:
35648 icode = CODE_FOR_kortestzhi;
35649 mode0 = HImode;
35650 mode1 = CCZmode;
35651
35652 kortest:
35653 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35654 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35655 op0 = expand_normal (arg0);
35656 op1 = expand_normal (arg1);
35657
35658 op0 = copy_to_reg (op0);
35659 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35660 op1 = copy_to_reg (op1);
35661 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35662
35663 target = gen_reg_rtx (QImode);
35664 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35665
35666 /* Emit kortest. */
35667 emit_insn (GEN_FCN (icode) (op0, op1));
35668 /* And use setcc to return result from flags. */
35669 ix86_expand_setcc (target, EQ,
35670 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35671 return target;
35672
35673 case IX86_BUILTIN_GATHERSIV2DF:
35674 icode = CODE_FOR_avx2_gathersiv2df;
35675 goto gather_gen;
35676 case IX86_BUILTIN_GATHERSIV4DF:
35677 icode = CODE_FOR_avx2_gathersiv4df;
35678 goto gather_gen;
35679 case IX86_BUILTIN_GATHERDIV2DF:
35680 icode = CODE_FOR_avx2_gatherdiv2df;
35681 goto gather_gen;
35682 case IX86_BUILTIN_GATHERDIV4DF:
35683 icode = CODE_FOR_avx2_gatherdiv4df;
35684 goto gather_gen;
35685 case IX86_BUILTIN_GATHERSIV4SF:
35686 icode = CODE_FOR_avx2_gathersiv4sf;
35687 goto gather_gen;
35688 case IX86_BUILTIN_GATHERSIV8SF:
35689 icode = CODE_FOR_avx2_gathersiv8sf;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERDIV4SF:
35692 icode = CODE_FOR_avx2_gatherdiv4sf;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERDIV8SF:
35695 icode = CODE_FOR_avx2_gatherdiv8sf;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERSIV2DI:
35698 icode = CODE_FOR_avx2_gathersiv2di;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERSIV4DI:
35701 icode = CODE_FOR_avx2_gathersiv4di;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERDIV2DI:
35704 icode = CODE_FOR_avx2_gatherdiv2di;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERDIV4DI:
35707 icode = CODE_FOR_avx2_gatherdiv4di;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERSIV4SI:
35710 icode = CODE_FOR_avx2_gathersiv4si;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERSIV8SI:
35713 icode = CODE_FOR_avx2_gathersiv8si;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERDIV4SI:
35716 icode = CODE_FOR_avx2_gatherdiv4si;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERDIV8SI:
35719 icode = CODE_FOR_avx2_gatherdiv8si;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERALTSIV4DF:
35722 icode = CODE_FOR_avx2_gathersiv4df;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERALTDIV8SF:
35725 icode = CODE_FOR_avx2_gatherdiv8sf;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERALTSIV4DI:
35728 icode = CODE_FOR_avx2_gathersiv4di;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERALTDIV8SI:
35731 icode = CODE_FOR_avx2_gatherdiv8si;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHER3SIV16SF:
35734 icode = CODE_FOR_avx512f_gathersiv16sf;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHER3SIV8DF:
35737 icode = CODE_FOR_avx512f_gathersiv8df;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHER3DIV16SF:
35740 icode = CODE_FOR_avx512f_gatherdiv16sf;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHER3DIV8DF:
35743 icode = CODE_FOR_avx512f_gatherdiv8df;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHER3SIV16SI:
35746 icode = CODE_FOR_avx512f_gathersiv16si;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3SIV8DI:
35749 icode = CODE_FOR_avx512f_gathersiv8di;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3DIV16SI:
35752 icode = CODE_FOR_avx512f_gatherdiv16si;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3DIV8DI:
35755 icode = CODE_FOR_avx512f_gatherdiv8di;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35758 icode = CODE_FOR_avx512f_gathersiv8df;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35761 icode = CODE_FOR_avx512f_gatherdiv16sf;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35764 icode = CODE_FOR_avx512f_gathersiv8di;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35767 icode = CODE_FOR_avx512f_gatherdiv16si;
35768 goto gather_gen;
35769 case IX86_BUILTIN_SCATTERSIV16SF:
35770 icode = CODE_FOR_avx512f_scattersiv16sf;
35771 goto scatter_gen;
35772 case IX86_BUILTIN_SCATTERSIV8DF:
35773 icode = CODE_FOR_avx512f_scattersiv8df;
35774 goto scatter_gen;
35775 case IX86_BUILTIN_SCATTERDIV16SF:
35776 icode = CODE_FOR_avx512f_scatterdiv16sf;
35777 goto scatter_gen;
35778 case IX86_BUILTIN_SCATTERDIV8DF:
35779 icode = CODE_FOR_avx512f_scatterdiv8df;
35780 goto scatter_gen;
35781 case IX86_BUILTIN_SCATTERSIV16SI:
35782 icode = CODE_FOR_avx512f_scattersiv16si;
35783 goto scatter_gen;
35784 case IX86_BUILTIN_SCATTERSIV8DI:
35785 icode = CODE_FOR_avx512f_scattersiv8di;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERDIV16SI:
35788 icode = CODE_FOR_avx512f_scatterdiv16si;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERDIV8DI:
35791 icode = CODE_FOR_avx512f_scatterdiv8di;
35792 goto scatter_gen;
35793
35794 case IX86_BUILTIN_GATHERPFDPD:
35795 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35796 goto vec_prefetch_gen;
35797 case IX86_BUILTIN_GATHERPFDPS:
35798 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35799 goto vec_prefetch_gen;
35800 case IX86_BUILTIN_GATHERPFQPD:
35801 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35802 goto vec_prefetch_gen;
35803 case IX86_BUILTIN_GATHERPFQPS:
35804 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35805 goto vec_prefetch_gen;
35806 case IX86_BUILTIN_SCATTERPFDPD:
35807 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35808 goto vec_prefetch_gen;
35809 case IX86_BUILTIN_SCATTERPFDPS:
35810 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_SCATTERPFQPD:
35813 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_SCATTERPFQPS:
35816 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35817 goto vec_prefetch_gen;
35818
35819 gather_gen:
35820 rtx half;
35821 rtx (*gen) (rtx, rtx);
35822
35823 arg0 = CALL_EXPR_ARG (exp, 0);
35824 arg1 = CALL_EXPR_ARG (exp, 1);
35825 arg2 = CALL_EXPR_ARG (exp, 2);
35826 arg3 = CALL_EXPR_ARG (exp, 3);
35827 arg4 = CALL_EXPR_ARG (exp, 4);
35828 op0 = expand_normal (arg0);
35829 op1 = expand_normal (arg1);
35830 op2 = expand_normal (arg2);
35831 op3 = expand_normal (arg3);
35832 op4 = expand_normal (arg4);
35833 /* Note the arg order is different from the operand order. */
35834 mode0 = insn_data[icode].operand[1].mode;
35835 mode2 = insn_data[icode].operand[3].mode;
35836 mode3 = insn_data[icode].operand[4].mode;
35837 mode4 = insn_data[icode].operand[5].mode;
35838
35839 if (target == NULL_RTX
35840 || GET_MODE (target) != insn_data[icode].operand[0].mode
35841 || !insn_data[icode].operand[0].predicate (target,
35842 GET_MODE (target)))
35843 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35844 else
35845 subtarget = target;
35846
35847 switch (fcode)
35848 {
35849 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35850 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35851 half = gen_reg_rtx (V8SImode);
35852 if (!nonimmediate_operand (op2, V16SImode))
35853 op2 = copy_to_mode_reg (V16SImode, op2);
35854 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35855 op2 = half;
35856 break;
35857 case IX86_BUILTIN_GATHERALTSIV4DF:
35858 case IX86_BUILTIN_GATHERALTSIV4DI:
35859 half = gen_reg_rtx (V4SImode);
35860 if (!nonimmediate_operand (op2, V8SImode))
35861 op2 = copy_to_mode_reg (V8SImode, op2);
35862 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35863 op2 = half;
35864 break;
35865 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35866 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35867 half = gen_reg_rtx (mode0);
35868 if (mode0 == V8SFmode)
35869 gen = gen_vec_extract_lo_v16sf;
35870 else
35871 gen = gen_vec_extract_lo_v16si;
35872 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35873 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35874 emit_insn (gen (half, op0));
35875 op0 = half;
35876 if (GET_MODE (op3) != VOIDmode)
35877 {
35878 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35879 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35880 emit_insn (gen (half, op3));
35881 op3 = half;
35882 }
35883 break;
35884 case IX86_BUILTIN_GATHERALTDIV8SF:
35885 case IX86_BUILTIN_GATHERALTDIV8SI:
35886 half = gen_reg_rtx (mode0);
35887 if (mode0 == V4SFmode)
35888 gen = gen_vec_extract_lo_v8sf;
35889 else
35890 gen = gen_vec_extract_lo_v8si;
35891 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35892 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35893 emit_insn (gen (half, op0));
35894 op0 = half;
35895 if (GET_MODE (op3) != VOIDmode)
35896 {
35897 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35898 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35899 emit_insn (gen (half, op3));
35900 op3 = half;
35901 }
35902 break;
35903 default:
35904 break;
35905 }
35906
35907 /* Force memory operand only with base register here. But we
35908 don't want to do it on memory operand for other builtin
35909 functions. */
35910 op1 = ix86_zero_extend_to_Pmode (op1);
35911
35912 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35913 op0 = copy_to_mode_reg (mode0, op0);
35914 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35915 op1 = copy_to_mode_reg (Pmode, op1);
35916 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35917 op2 = copy_to_mode_reg (mode2, op2);
35918 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35919 {
35920 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35921 op3 = copy_to_mode_reg (mode3, op3);
35922 }
35923 else
35924 {
35925 op3 = copy_to_reg (op3);
35926 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35927 }
35928 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35929 {
35930 error ("the last argument must be scale 1, 2, 4, 8");
35931 return const0_rtx;
35932 }
35933
35934 /* Optimize. If mask is known to have all high bits set,
35935 replace op0 with pc_rtx to signal that the instruction
35936 overwrites the whole destination and doesn't use its
35937 previous contents. */
35938 if (optimize)
35939 {
35940 if (TREE_CODE (arg3) == INTEGER_CST)
35941 {
35942 if (integer_all_onesp (arg3))
35943 op0 = pc_rtx;
35944 }
35945 else if (TREE_CODE (arg3) == VECTOR_CST)
35946 {
35947 unsigned int negative = 0;
35948 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35949 {
35950 tree cst = VECTOR_CST_ELT (arg3, i);
35951 if (TREE_CODE (cst) == INTEGER_CST
35952 && tree_int_cst_sign_bit (cst))
35953 negative++;
35954 else if (TREE_CODE (cst) == REAL_CST
35955 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35956 negative++;
35957 }
35958 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35959 op0 = pc_rtx;
35960 }
35961 else if (TREE_CODE (arg3) == SSA_NAME
35962 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35963 {
35964 /* Recognize also when mask is like:
35965 __v2df src = _mm_setzero_pd ();
35966 __v2df mask = _mm_cmpeq_pd (src, src);
35967 or
35968 __v8sf src = _mm256_setzero_ps ();
35969 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35970 as that is a cheaper way to load all ones into
35971 a register than having to load a constant from
35972 memory. */
35973 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35974 if (is_gimple_call (def_stmt))
35975 {
35976 tree fndecl = gimple_call_fndecl (def_stmt);
35977 if (fndecl
35978 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35979 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35980 {
35981 case IX86_BUILTIN_CMPPD:
35982 case IX86_BUILTIN_CMPPS:
35983 case IX86_BUILTIN_CMPPD256:
35984 case IX86_BUILTIN_CMPPS256:
35985 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35986 break;
35987 /* FALLTHRU */
35988 case IX86_BUILTIN_CMPEQPD:
35989 case IX86_BUILTIN_CMPEQPS:
35990 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35991 && initializer_zerop (gimple_call_arg (def_stmt,
35992 1)))
35993 op0 = pc_rtx;
35994 break;
35995 default:
35996 break;
35997 }
35998 }
35999 }
36000 }
36001
36002 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36003 if (! pat)
36004 return const0_rtx;
36005 emit_insn (pat);
36006
36007 switch (fcode)
36008 {
36009 case IX86_BUILTIN_GATHER3DIV16SF:
36010 if (target == NULL_RTX)
36011 target = gen_reg_rtx (V8SFmode);
36012 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36013 break;
36014 case IX86_BUILTIN_GATHER3DIV16SI:
36015 if (target == NULL_RTX)
36016 target = gen_reg_rtx (V8SImode);
36017 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36018 break;
36019 case IX86_BUILTIN_GATHERDIV8SF:
36020 if (target == NULL_RTX)
36021 target = gen_reg_rtx (V4SFmode);
36022 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36023 break;
36024 case IX86_BUILTIN_GATHERDIV8SI:
36025 if (target == NULL_RTX)
36026 target = gen_reg_rtx (V4SImode);
36027 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36028 break;
36029 default:
36030 target = subtarget;
36031 break;
36032 }
36033 return target;
36034
36035 scatter_gen:
36036 arg0 = CALL_EXPR_ARG (exp, 0);
36037 arg1 = CALL_EXPR_ARG (exp, 1);
36038 arg2 = CALL_EXPR_ARG (exp, 2);
36039 arg3 = CALL_EXPR_ARG (exp, 3);
36040 arg4 = CALL_EXPR_ARG (exp, 4);
36041 op0 = expand_normal (arg0);
36042 op1 = expand_normal (arg1);
36043 op2 = expand_normal (arg2);
36044 op3 = expand_normal (arg3);
36045 op4 = expand_normal (arg4);
36046 mode1 = insn_data[icode].operand[1].mode;
36047 mode2 = insn_data[icode].operand[2].mode;
36048 mode3 = insn_data[icode].operand[3].mode;
36049 mode4 = insn_data[icode].operand[4].mode;
36050
36051 /* Force memory operand only with base register here. But we
36052 don't want to do it on memory operand for other builtin
36053 functions. */
36054 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36055
36056 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36057 op0 = copy_to_mode_reg (Pmode, op0);
36058
36059 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36060 {
36061 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36062 op1 = copy_to_mode_reg (mode1, op1);
36063 }
36064 else
36065 {
36066 op1 = copy_to_reg (op1);
36067 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36068 }
36069
36070 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36071 op2 = copy_to_mode_reg (mode2, op2);
36072
36073 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36074 op3 = copy_to_mode_reg (mode3, op3);
36075
36076 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36077 {
36078 error ("the last argument must be scale 1, 2, 4, 8");
36079 return const0_rtx;
36080 }
36081
36082 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36083 if (! pat)
36084 return const0_rtx;
36085
36086 emit_insn (pat);
36087 return 0;
36088
36089 vec_prefetch_gen:
36090 arg0 = CALL_EXPR_ARG (exp, 0);
36091 arg1 = CALL_EXPR_ARG (exp, 1);
36092 arg2 = CALL_EXPR_ARG (exp, 2);
36093 arg3 = CALL_EXPR_ARG (exp, 3);
36094 arg4 = CALL_EXPR_ARG (exp, 4);
36095 op0 = expand_normal (arg0);
36096 op1 = expand_normal (arg1);
36097 op2 = expand_normal (arg2);
36098 op3 = expand_normal (arg3);
36099 op4 = expand_normal (arg4);
36100 mode0 = insn_data[icode].operand[0].mode;
36101 mode1 = insn_data[icode].operand[1].mode;
36102 mode3 = insn_data[icode].operand[3].mode;
36103 mode4 = insn_data[icode].operand[4].mode;
36104
36105 if (GET_MODE (op0) == mode0
36106 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36107 {
36108 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36109 op0 = copy_to_mode_reg (mode0, op0);
36110 }
36111 else if (op0 != constm1_rtx)
36112 {
36113 op0 = copy_to_reg (op0);
36114 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36115 }
36116
36117 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36118 op1 = copy_to_mode_reg (mode1, op1);
36119
36120 /* Force memory operand only with base register here. But we
36121 don't want to do it on memory operand for other builtin
36122 functions. */
36123 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36124
36125 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36126 op2 = copy_to_mode_reg (Pmode, op2);
36127
36128 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36129 {
36130 error ("the forth argument must be scale 1, 2, 4, 8");
36131 return const0_rtx;
36132 }
36133
36134 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36135 {
36136 error ("incorrect hint operand");
36137 return const0_rtx;
36138 }
36139
36140 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36141 if (! pat)
36142 return const0_rtx;
36143
36144 emit_insn (pat);
36145
36146 return 0;
36147
36148 case IX86_BUILTIN_XABORT:
36149 icode = CODE_FOR_xabort;
36150 arg0 = CALL_EXPR_ARG (exp, 0);
36151 op0 = expand_normal (arg0);
36152 mode0 = insn_data[icode].operand[0].mode;
36153 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36154 {
36155 error ("the xabort's argument must be an 8-bit immediate");
36156 return const0_rtx;
36157 }
36158 emit_insn (gen_xabort (op0));
36159 return 0;
36160
36161 default:
36162 break;
36163 }
36164
36165 for (i = 0, d = bdesc_special_args;
36166 i < ARRAY_SIZE (bdesc_special_args);
36167 i++, d++)
36168 if (d->code == fcode)
36169 return ix86_expand_special_args_builtin (d, exp, target);
36170
36171 for (i = 0, d = bdesc_args;
36172 i < ARRAY_SIZE (bdesc_args);
36173 i++, d++)
36174 if (d->code == fcode)
36175 switch (fcode)
36176 {
36177 case IX86_BUILTIN_FABSQ:
36178 case IX86_BUILTIN_COPYSIGNQ:
36179 if (!TARGET_SSE)
36180 /* Emit a normal call if SSE isn't available. */
36181 return expand_call (exp, target, ignore);
36182 default:
36183 return ix86_expand_args_builtin (d, exp, target);
36184 }
36185
36186 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36187 if (d->code == fcode)
36188 return ix86_expand_sse_comi (d, exp, target);
36189
36190 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36191 if (d->code == fcode)
36192 return ix86_expand_round_builtin (d, exp, target);
36193
36194 for (i = 0, d = bdesc_pcmpestr;
36195 i < ARRAY_SIZE (bdesc_pcmpestr);
36196 i++, d++)
36197 if (d->code == fcode)
36198 return ix86_expand_sse_pcmpestr (d, exp, target);
36199
36200 for (i = 0, d = bdesc_pcmpistr;
36201 i < ARRAY_SIZE (bdesc_pcmpistr);
36202 i++, d++)
36203 if (d->code == fcode)
36204 return ix86_expand_sse_pcmpistr (d, exp, target);
36205
36206 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36207 if (d->code == fcode)
36208 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36209 (enum ix86_builtin_func_type)
36210 d->flag, d->comparison);
36211
36212 gcc_unreachable ();
36213 }
36214
36215 /* This returns the target-specific builtin with code CODE if
36216 current_function_decl has visibility on this builtin, which is checked
36217 using isa flags. Returns NULL_TREE otherwise. */
36218
36219 static tree ix86_get_builtin (enum ix86_builtins code)
36220 {
36221 struct cl_target_option *opts;
36222 tree target_tree = NULL_TREE;
36223
36224 /* Determine the isa flags of current_function_decl. */
36225
36226 if (current_function_decl)
36227 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36228
36229 if (target_tree == NULL)
36230 target_tree = target_option_default_node;
36231
36232 opts = TREE_TARGET_OPTION (target_tree);
36233
36234 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36235 return ix86_builtin_decl (code, true);
36236 else
36237 return NULL_TREE;
36238 }
36239
36240 /* Returns a function decl for a vectorized version of the builtin function
36241 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36242 if it is not available. */
36243
36244 static tree
36245 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36246 tree type_in)
36247 {
36248 enum machine_mode in_mode, out_mode;
36249 int in_n, out_n;
36250 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36251
36252 if (TREE_CODE (type_out) != VECTOR_TYPE
36253 || TREE_CODE (type_in) != VECTOR_TYPE
36254 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36255 return NULL_TREE;
36256
36257 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36258 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36259 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36260 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36261
36262 switch (fn)
36263 {
36264 case BUILT_IN_SQRT:
36265 if (out_mode == DFmode && in_mode == DFmode)
36266 {
36267 if (out_n == 2 && in_n == 2)
36268 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36269 else if (out_n == 4 && in_n == 4)
36270 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36271 else if (out_n == 8 && in_n == 8)
36272 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36273 }
36274 break;
36275
36276 case BUILT_IN_EXP2F:
36277 if (out_mode == SFmode && in_mode == SFmode)
36278 {
36279 if (out_n == 16 && in_n == 16)
36280 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36281 }
36282 break;
36283
36284 case BUILT_IN_SQRTF:
36285 if (out_mode == SFmode && in_mode == SFmode)
36286 {
36287 if (out_n == 4 && in_n == 4)
36288 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36289 else if (out_n == 8 && in_n == 8)
36290 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36291 else if (out_n == 16 && in_n == 16)
36292 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36293 }
36294 break;
36295
36296 case BUILT_IN_IFLOOR:
36297 case BUILT_IN_LFLOOR:
36298 case BUILT_IN_LLFLOOR:
36299 /* The round insn does not trap on denormals. */
36300 if (flag_trapping_math || !TARGET_ROUND)
36301 break;
36302
36303 if (out_mode == SImode && in_mode == DFmode)
36304 {
36305 if (out_n == 4 && in_n == 2)
36306 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36307 else if (out_n == 8 && in_n == 4)
36308 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36309 else if (out_n == 16 && in_n == 8)
36310 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36311 }
36312 break;
36313
36314 case BUILT_IN_IFLOORF:
36315 case BUILT_IN_LFLOORF:
36316 case BUILT_IN_LLFLOORF:
36317 /* The round insn does not trap on denormals. */
36318 if (flag_trapping_math || !TARGET_ROUND)
36319 break;
36320
36321 if (out_mode == SImode && in_mode == SFmode)
36322 {
36323 if (out_n == 4 && in_n == 4)
36324 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36325 else if (out_n == 8 && in_n == 8)
36326 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36327 }
36328 break;
36329
36330 case BUILT_IN_ICEIL:
36331 case BUILT_IN_LCEIL:
36332 case BUILT_IN_LLCEIL:
36333 /* The round insn does not trap on denormals. */
36334 if (flag_trapping_math || !TARGET_ROUND)
36335 break;
36336
36337 if (out_mode == SImode && in_mode == DFmode)
36338 {
36339 if (out_n == 4 && in_n == 2)
36340 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36341 else if (out_n == 8 && in_n == 4)
36342 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36343 else if (out_n == 16 && in_n == 8)
36344 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36345 }
36346 break;
36347
36348 case BUILT_IN_ICEILF:
36349 case BUILT_IN_LCEILF:
36350 case BUILT_IN_LLCEILF:
36351 /* The round insn does not trap on denormals. */
36352 if (flag_trapping_math || !TARGET_ROUND)
36353 break;
36354
36355 if (out_mode == SImode && in_mode == SFmode)
36356 {
36357 if (out_n == 4 && in_n == 4)
36358 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36359 else if (out_n == 8 && in_n == 8)
36360 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36361 }
36362 break;
36363
36364 case BUILT_IN_IRINT:
36365 case BUILT_IN_LRINT:
36366 case BUILT_IN_LLRINT:
36367 if (out_mode == SImode && in_mode == DFmode)
36368 {
36369 if (out_n == 4 && in_n == 2)
36370 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36371 else if (out_n == 8 && in_n == 4)
36372 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36373 }
36374 break;
36375
36376 case BUILT_IN_IRINTF:
36377 case BUILT_IN_LRINTF:
36378 case BUILT_IN_LLRINTF:
36379 if (out_mode == SImode && in_mode == SFmode)
36380 {
36381 if (out_n == 4 && in_n == 4)
36382 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36383 else if (out_n == 8 && in_n == 8)
36384 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36385 }
36386 break;
36387
36388 case BUILT_IN_IROUND:
36389 case BUILT_IN_LROUND:
36390 case BUILT_IN_LLROUND:
36391 /* The round insn does not trap on denormals. */
36392 if (flag_trapping_math || !TARGET_ROUND)
36393 break;
36394
36395 if (out_mode == SImode && in_mode == DFmode)
36396 {
36397 if (out_n == 4 && in_n == 2)
36398 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36399 else if (out_n == 8 && in_n == 4)
36400 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36401 else if (out_n == 16 && in_n == 8)
36402 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36403 }
36404 break;
36405
36406 case BUILT_IN_IROUNDF:
36407 case BUILT_IN_LROUNDF:
36408 case BUILT_IN_LLROUNDF:
36409 /* The round insn does not trap on denormals. */
36410 if (flag_trapping_math || !TARGET_ROUND)
36411 break;
36412
36413 if (out_mode == SImode && in_mode == SFmode)
36414 {
36415 if (out_n == 4 && in_n == 4)
36416 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36417 else if (out_n == 8 && in_n == 8)
36418 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36419 }
36420 break;
36421
36422 case BUILT_IN_COPYSIGN:
36423 if (out_mode == DFmode && in_mode == DFmode)
36424 {
36425 if (out_n == 2 && in_n == 2)
36426 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36427 else if (out_n == 4 && in_n == 4)
36428 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36429 else if (out_n == 8 && in_n == 8)
36430 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36431 }
36432 break;
36433
36434 case BUILT_IN_COPYSIGNF:
36435 if (out_mode == SFmode && in_mode == SFmode)
36436 {
36437 if (out_n == 4 && in_n == 4)
36438 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36439 else if (out_n == 8 && in_n == 8)
36440 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36441 else if (out_n == 16 && in_n == 16)
36442 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36443 }
36444 break;
36445
36446 case BUILT_IN_FLOOR:
36447 /* The round insn does not trap on denormals. */
36448 if (flag_trapping_math || !TARGET_ROUND)
36449 break;
36450
36451 if (out_mode == DFmode && in_mode == DFmode)
36452 {
36453 if (out_n == 2 && in_n == 2)
36454 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36455 else if (out_n == 4 && in_n == 4)
36456 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36457 }
36458 break;
36459
36460 case BUILT_IN_FLOORF:
36461 /* The round insn does not trap on denormals. */
36462 if (flag_trapping_math || !TARGET_ROUND)
36463 break;
36464
36465 if (out_mode == SFmode && in_mode == SFmode)
36466 {
36467 if (out_n == 4 && in_n == 4)
36468 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36469 else if (out_n == 8 && in_n == 8)
36470 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36471 }
36472 break;
36473
36474 case BUILT_IN_CEIL:
36475 /* The round insn does not trap on denormals. */
36476 if (flag_trapping_math || !TARGET_ROUND)
36477 break;
36478
36479 if (out_mode == DFmode && in_mode == DFmode)
36480 {
36481 if (out_n == 2 && in_n == 2)
36482 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36483 else if (out_n == 4 && in_n == 4)
36484 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36485 }
36486 break;
36487
36488 case BUILT_IN_CEILF:
36489 /* The round insn does not trap on denormals. */
36490 if (flag_trapping_math || !TARGET_ROUND)
36491 break;
36492
36493 if (out_mode == SFmode && in_mode == SFmode)
36494 {
36495 if (out_n == 4 && in_n == 4)
36496 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36497 else if (out_n == 8 && in_n == 8)
36498 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36499 }
36500 break;
36501
36502 case BUILT_IN_TRUNC:
36503 /* The round insn does not trap on denormals. */
36504 if (flag_trapping_math || !TARGET_ROUND)
36505 break;
36506
36507 if (out_mode == DFmode && in_mode == DFmode)
36508 {
36509 if (out_n == 2 && in_n == 2)
36510 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36511 else if (out_n == 4 && in_n == 4)
36512 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36513 }
36514 break;
36515
36516 case BUILT_IN_TRUNCF:
36517 /* The round insn does not trap on denormals. */
36518 if (flag_trapping_math || !TARGET_ROUND)
36519 break;
36520
36521 if (out_mode == SFmode && in_mode == SFmode)
36522 {
36523 if (out_n == 4 && in_n == 4)
36524 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36525 else if (out_n == 8 && in_n == 8)
36526 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36527 }
36528 break;
36529
36530 case BUILT_IN_RINT:
36531 /* The round insn does not trap on denormals. */
36532 if (flag_trapping_math || !TARGET_ROUND)
36533 break;
36534
36535 if (out_mode == DFmode && in_mode == DFmode)
36536 {
36537 if (out_n == 2 && in_n == 2)
36538 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36539 else if (out_n == 4 && in_n == 4)
36540 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36541 }
36542 break;
36543
36544 case BUILT_IN_RINTF:
36545 /* The round insn does not trap on denormals. */
36546 if (flag_trapping_math || !TARGET_ROUND)
36547 break;
36548
36549 if (out_mode == SFmode && in_mode == SFmode)
36550 {
36551 if (out_n == 4 && in_n == 4)
36552 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36553 else if (out_n == 8 && in_n == 8)
36554 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36555 }
36556 break;
36557
36558 case BUILT_IN_ROUND:
36559 /* The round insn does not trap on denormals. */
36560 if (flag_trapping_math || !TARGET_ROUND)
36561 break;
36562
36563 if (out_mode == DFmode && in_mode == DFmode)
36564 {
36565 if (out_n == 2 && in_n == 2)
36566 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36567 else if (out_n == 4 && in_n == 4)
36568 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36569 }
36570 break;
36571
36572 case BUILT_IN_ROUNDF:
36573 /* The round insn does not trap on denormals. */
36574 if (flag_trapping_math || !TARGET_ROUND)
36575 break;
36576
36577 if (out_mode == SFmode && in_mode == SFmode)
36578 {
36579 if (out_n == 4 && in_n == 4)
36580 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36581 else if (out_n == 8 && in_n == 8)
36582 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36583 }
36584 break;
36585
36586 case BUILT_IN_FMA:
36587 if (out_mode == DFmode && in_mode == DFmode)
36588 {
36589 if (out_n == 2 && in_n == 2)
36590 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36591 if (out_n == 4 && in_n == 4)
36592 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36593 }
36594 break;
36595
36596 case BUILT_IN_FMAF:
36597 if (out_mode == SFmode && in_mode == SFmode)
36598 {
36599 if (out_n == 4 && in_n == 4)
36600 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36601 if (out_n == 8 && in_n == 8)
36602 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36603 }
36604 break;
36605
36606 default:
36607 break;
36608 }
36609
36610 /* Dispatch to a handler for a vectorization library. */
36611 if (ix86_veclib_handler)
36612 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36613 type_in);
36614
36615 return NULL_TREE;
36616 }
36617
36618 /* Handler for an SVML-style interface to
36619 a library with vectorized intrinsics. */
36620
36621 static tree
36622 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36623 {
36624 char name[20];
36625 tree fntype, new_fndecl, args;
36626 unsigned arity;
36627 const char *bname;
36628 enum machine_mode el_mode, in_mode;
36629 int n, in_n;
36630
36631 /* The SVML is suitable for unsafe math only. */
36632 if (!flag_unsafe_math_optimizations)
36633 return NULL_TREE;
36634
36635 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36636 n = TYPE_VECTOR_SUBPARTS (type_out);
36637 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36638 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36639 if (el_mode != in_mode
36640 || n != in_n)
36641 return NULL_TREE;
36642
36643 switch (fn)
36644 {
36645 case BUILT_IN_EXP:
36646 case BUILT_IN_LOG:
36647 case BUILT_IN_LOG10:
36648 case BUILT_IN_POW:
36649 case BUILT_IN_TANH:
36650 case BUILT_IN_TAN:
36651 case BUILT_IN_ATAN:
36652 case BUILT_IN_ATAN2:
36653 case BUILT_IN_ATANH:
36654 case BUILT_IN_CBRT:
36655 case BUILT_IN_SINH:
36656 case BUILT_IN_SIN:
36657 case BUILT_IN_ASINH:
36658 case BUILT_IN_ASIN:
36659 case BUILT_IN_COSH:
36660 case BUILT_IN_COS:
36661 case BUILT_IN_ACOSH:
36662 case BUILT_IN_ACOS:
36663 if (el_mode != DFmode || n != 2)
36664 return NULL_TREE;
36665 break;
36666
36667 case BUILT_IN_EXPF:
36668 case BUILT_IN_LOGF:
36669 case BUILT_IN_LOG10F:
36670 case BUILT_IN_POWF:
36671 case BUILT_IN_TANHF:
36672 case BUILT_IN_TANF:
36673 case BUILT_IN_ATANF:
36674 case BUILT_IN_ATAN2F:
36675 case BUILT_IN_ATANHF:
36676 case BUILT_IN_CBRTF:
36677 case BUILT_IN_SINHF:
36678 case BUILT_IN_SINF:
36679 case BUILT_IN_ASINHF:
36680 case BUILT_IN_ASINF:
36681 case BUILT_IN_COSHF:
36682 case BUILT_IN_COSF:
36683 case BUILT_IN_ACOSHF:
36684 case BUILT_IN_ACOSF:
36685 if (el_mode != SFmode || n != 4)
36686 return NULL_TREE;
36687 break;
36688
36689 default:
36690 return NULL_TREE;
36691 }
36692
36693 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36694
36695 if (fn == BUILT_IN_LOGF)
36696 strcpy (name, "vmlsLn4");
36697 else if (fn == BUILT_IN_LOG)
36698 strcpy (name, "vmldLn2");
36699 else if (n == 4)
36700 {
36701 sprintf (name, "vmls%s", bname+10);
36702 name[strlen (name)-1] = '4';
36703 }
36704 else
36705 sprintf (name, "vmld%s2", bname+10);
36706
36707 /* Convert to uppercase. */
36708 name[4] &= ~0x20;
36709
36710 arity = 0;
36711 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36712 args;
36713 args = TREE_CHAIN (args))
36714 arity++;
36715
36716 if (arity == 1)
36717 fntype = build_function_type_list (type_out, type_in, NULL);
36718 else
36719 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36720
36721 /* Build a function declaration for the vectorized function. */
36722 new_fndecl = build_decl (BUILTINS_LOCATION,
36723 FUNCTION_DECL, get_identifier (name), fntype);
36724 TREE_PUBLIC (new_fndecl) = 1;
36725 DECL_EXTERNAL (new_fndecl) = 1;
36726 DECL_IS_NOVOPS (new_fndecl) = 1;
36727 TREE_READONLY (new_fndecl) = 1;
36728
36729 return new_fndecl;
36730 }
36731
36732 /* Handler for an ACML-style interface to
36733 a library with vectorized intrinsics. */
36734
36735 static tree
36736 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36737 {
36738 char name[20] = "__vr.._";
36739 tree fntype, new_fndecl, args;
36740 unsigned arity;
36741 const char *bname;
36742 enum machine_mode el_mode, in_mode;
36743 int n, in_n;
36744
36745 /* The ACML is 64bits only and suitable for unsafe math only as
36746 it does not correctly support parts of IEEE with the required
36747 precision such as denormals. */
36748 if (!TARGET_64BIT
36749 || !flag_unsafe_math_optimizations)
36750 return NULL_TREE;
36751
36752 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36753 n = TYPE_VECTOR_SUBPARTS (type_out);
36754 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36755 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36756 if (el_mode != in_mode
36757 || n != in_n)
36758 return NULL_TREE;
36759
36760 switch (fn)
36761 {
36762 case BUILT_IN_SIN:
36763 case BUILT_IN_COS:
36764 case BUILT_IN_EXP:
36765 case BUILT_IN_LOG:
36766 case BUILT_IN_LOG2:
36767 case BUILT_IN_LOG10:
36768 name[4] = 'd';
36769 name[5] = '2';
36770 if (el_mode != DFmode
36771 || n != 2)
36772 return NULL_TREE;
36773 break;
36774
36775 case BUILT_IN_SINF:
36776 case BUILT_IN_COSF:
36777 case BUILT_IN_EXPF:
36778 case BUILT_IN_POWF:
36779 case BUILT_IN_LOGF:
36780 case BUILT_IN_LOG2F:
36781 case BUILT_IN_LOG10F:
36782 name[4] = 's';
36783 name[5] = '4';
36784 if (el_mode != SFmode
36785 || n != 4)
36786 return NULL_TREE;
36787 break;
36788
36789 default:
36790 return NULL_TREE;
36791 }
36792
36793 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36794 sprintf (name + 7, "%s", bname+10);
36795
36796 arity = 0;
36797 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36798 args;
36799 args = TREE_CHAIN (args))
36800 arity++;
36801
36802 if (arity == 1)
36803 fntype = build_function_type_list (type_out, type_in, NULL);
36804 else
36805 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36806
36807 /* Build a function declaration for the vectorized function. */
36808 new_fndecl = build_decl (BUILTINS_LOCATION,
36809 FUNCTION_DECL, get_identifier (name), fntype);
36810 TREE_PUBLIC (new_fndecl) = 1;
36811 DECL_EXTERNAL (new_fndecl) = 1;
36812 DECL_IS_NOVOPS (new_fndecl) = 1;
36813 TREE_READONLY (new_fndecl) = 1;
36814
36815 return new_fndecl;
36816 }
36817
36818 /* Returns a decl of a function that implements gather load with
36819 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36820 Return NULL_TREE if it is not available. */
36821
36822 static tree
36823 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36824 const_tree index_type, int scale)
36825 {
36826 bool si;
36827 enum ix86_builtins code;
36828
36829 if (! TARGET_AVX2)
36830 return NULL_TREE;
36831
36832 if ((TREE_CODE (index_type) != INTEGER_TYPE
36833 && !POINTER_TYPE_P (index_type))
36834 || (TYPE_MODE (index_type) != SImode
36835 && TYPE_MODE (index_type) != DImode))
36836 return NULL_TREE;
36837
36838 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36839 return NULL_TREE;
36840
36841 /* v*gather* insn sign extends index to pointer mode. */
36842 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36843 && TYPE_UNSIGNED (index_type))
36844 return NULL_TREE;
36845
36846 if (scale <= 0
36847 || scale > 8
36848 || (scale & (scale - 1)) != 0)
36849 return NULL_TREE;
36850
36851 si = TYPE_MODE (index_type) == SImode;
36852 switch (TYPE_MODE (mem_vectype))
36853 {
36854 case V2DFmode:
36855 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36856 break;
36857 case V4DFmode:
36858 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36859 break;
36860 case V2DImode:
36861 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36862 break;
36863 case V4DImode:
36864 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36865 break;
36866 case V4SFmode:
36867 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36868 break;
36869 case V8SFmode:
36870 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36871 break;
36872 case V4SImode:
36873 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36874 break;
36875 case V8SImode:
36876 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36877 break;
36878 case V8DFmode:
36879 if (TARGET_AVX512F)
36880 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36881 else
36882 return NULL_TREE;
36883 break;
36884 case V8DImode:
36885 if (TARGET_AVX512F)
36886 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36887 else
36888 return NULL_TREE;
36889 break;
36890 case V16SFmode:
36891 if (TARGET_AVX512F)
36892 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36893 else
36894 return NULL_TREE;
36895 break;
36896 case V16SImode:
36897 if (TARGET_AVX512F)
36898 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36899 else
36900 return NULL_TREE;
36901 break;
36902 default:
36903 return NULL_TREE;
36904 }
36905
36906 return ix86_get_builtin (code);
36907 }
36908
36909 /* Returns a code for a target-specific builtin that implements
36910 reciprocal of the function, or NULL_TREE if not available. */
36911
36912 static tree
36913 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36914 {
36915 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36916 && flag_finite_math_only && !flag_trapping_math
36917 && flag_unsafe_math_optimizations))
36918 return NULL_TREE;
36919
36920 if (md_fn)
36921 /* Machine dependent builtins. */
36922 switch (fn)
36923 {
36924 /* Vectorized version of sqrt to rsqrt conversion. */
36925 case IX86_BUILTIN_SQRTPS_NR:
36926 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36927
36928 case IX86_BUILTIN_SQRTPS_NR256:
36929 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36930
36931 default:
36932 return NULL_TREE;
36933 }
36934 else
36935 /* Normal builtins. */
36936 switch (fn)
36937 {
36938 /* Sqrt to rsqrt conversion. */
36939 case BUILT_IN_SQRTF:
36940 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36941
36942 default:
36943 return NULL_TREE;
36944 }
36945 }
36946 \f
36947 /* Helper for avx_vpermilps256_operand et al. This is also used by
36948 the expansion functions to turn the parallel back into a mask.
36949 The return value is 0 for no match and the imm8+1 for a match. */
36950
36951 int
36952 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36953 {
36954 unsigned i, nelt = GET_MODE_NUNITS (mode);
36955 unsigned mask = 0;
36956 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36957
36958 if (XVECLEN (par, 0) != (int) nelt)
36959 return 0;
36960
36961 /* Validate that all of the elements are constants, and not totally
36962 out of range. Copy the data into an integral array to make the
36963 subsequent checks easier. */
36964 for (i = 0; i < nelt; ++i)
36965 {
36966 rtx er = XVECEXP (par, 0, i);
36967 unsigned HOST_WIDE_INT ei;
36968
36969 if (!CONST_INT_P (er))
36970 return 0;
36971 ei = INTVAL (er);
36972 if (ei >= nelt)
36973 return 0;
36974 ipar[i] = ei;
36975 }
36976
36977 switch (mode)
36978 {
36979 case V8DFmode:
36980 /* In the 512-bit DFmode case, we can only move elements within
36981 a 128-bit lane. First fill the second part of the mask,
36982 then fallthru. */
36983 for (i = 4; i < 6; ++i)
36984 {
36985 if (ipar[i] < 4 || ipar[i] >= 6)
36986 return 0;
36987 mask |= (ipar[i] - 4) << i;
36988 }
36989 for (i = 6; i < 8; ++i)
36990 {
36991 if (ipar[i] < 6)
36992 return 0;
36993 mask |= (ipar[i] - 6) << i;
36994 }
36995 /* FALLTHRU */
36996
36997 case V4DFmode:
36998 /* In the 256-bit DFmode case, we can only move elements within
36999 a 128-bit lane. */
37000 for (i = 0; i < 2; ++i)
37001 {
37002 if (ipar[i] >= 2)
37003 return 0;
37004 mask |= ipar[i] << i;
37005 }
37006 for (i = 2; i < 4; ++i)
37007 {
37008 if (ipar[i] < 2)
37009 return 0;
37010 mask |= (ipar[i] - 2) << i;
37011 }
37012 break;
37013
37014 case V16SFmode:
37015 /* In 512 bit SFmode case, permutation in the upper 256 bits
37016 must mirror the permutation in the lower 256-bits. */
37017 for (i = 0; i < 8; ++i)
37018 if (ipar[i] + 8 != ipar[i + 8])
37019 return 0;
37020 /* FALLTHRU */
37021
37022 case V8SFmode:
37023 /* In 256 bit SFmode case, we have full freedom of
37024 movement within the low 128-bit lane, but the high 128-bit
37025 lane must mirror the exact same pattern. */
37026 for (i = 0; i < 4; ++i)
37027 if (ipar[i] + 4 != ipar[i + 4])
37028 return 0;
37029 nelt = 4;
37030 /* FALLTHRU */
37031
37032 case V2DFmode:
37033 case V4SFmode:
37034 /* In the 128-bit case, we've full freedom in the placement of
37035 the elements from the source operand. */
37036 for (i = 0; i < nelt; ++i)
37037 mask |= ipar[i] << (i * (nelt / 2));
37038 break;
37039
37040 default:
37041 gcc_unreachable ();
37042 }
37043
37044 /* Make sure success has a non-zero value by adding one. */
37045 return mask + 1;
37046 }
37047
37048 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37049 the expansion functions to turn the parallel back into a mask.
37050 The return value is 0 for no match and the imm8+1 for a match. */
37051
37052 int
37053 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37054 {
37055 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37056 unsigned mask = 0;
37057 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37058
37059 if (XVECLEN (par, 0) != (int) nelt)
37060 return 0;
37061
37062 /* Validate that all of the elements are constants, and not totally
37063 out of range. Copy the data into an integral array to make the
37064 subsequent checks easier. */
37065 for (i = 0; i < nelt; ++i)
37066 {
37067 rtx er = XVECEXP (par, 0, i);
37068 unsigned HOST_WIDE_INT ei;
37069
37070 if (!CONST_INT_P (er))
37071 return 0;
37072 ei = INTVAL (er);
37073 if (ei >= 2 * nelt)
37074 return 0;
37075 ipar[i] = ei;
37076 }
37077
37078 /* Validate that the halves of the permute are halves. */
37079 for (i = 0; i < nelt2 - 1; ++i)
37080 if (ipar[i] + 1 != ipar[i + 1])
37081 return 0;
37082 for (i = nelt2; i < nelt - 1; ++i)
37083 if (ipar[i] + 1 != ipar[i + 1])
37084 return 0;
37085
37086 /* Reconstruct the mask. */
37087 for (i = 0; i < 2; ++i)
37088 {
37089 unsigned e = ipar[i * nelt2];
37090 if (e % nelt2)
37091 return 0;
37092 e /= nelt2;
37093 mask |= e << (i * 4);
37094 }
37095
37096 /* Make sure success has a non-zero value by adding one. */
37097 return mask + 1;
37098 }
37099 \f
37100 /* Return a register priority for hard reg REGNO. */
37101 static int
37102 ix86_register_priority (int hard_regno)
37103 {
37104 /* ebp and r13 as the base always wants a displacement, r12 as the
37105 base always wants an index. So discourage their usage in an
37106 address. */
37107 if (hard_regno == R12_REG || hard_regno == R13_REG)
37108 return 0;
37109 if (hard_regno == BP_REG)
37110 return 1;
37111 /* New x86-64 int registers result in bigger code size. Discourage
37112 them. */
37113 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37114 return 2;
37115 /* New x86-64 SSE registers result in bigger code size. Discourage
37116 them. */
37117 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37118 return 2;
37119 /* Usage of AX register results in smaller code. Prefer it. */
37120 if (hard_regno == 0)
37121 return 4;
37122 return 3;
37123 }
37124
37125 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37126
37127 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37128 QImode must go into class Q_REGS.
37129 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37130 movdf to do mem-to-mem moves through integer regs. */
37131
37132 static reg_class_t
37133 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37134 {
37135 enum machine_mode mode = GET_MODE (x);
37136
37137 /* We're only allowed to return a subclass of CLASS. Many of the
37138 following checks fail for NO_REGS, so eliminate that early. */
37139 if (regclass == NO_REGS)
37140 return NO_REGS;
37141
37142 /* All classes can load zeros. */
37143 if (x == CONST0_RTX (mode))
37144 return regclass;
37145
37146 /* Force constants into memory if we are loading a (nonzero) constant into
37147 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37148 instructions to load from a constant. */
37149 if (CONSTANT_P (x)
37150 && (MAYBE_MMX_CLASS_P (regclass)
37151 || MAYBE_SSE_CLASS_P (regclass)
37152 || MAYBE_MASK_CLASS_P (regclass)))
37153 return NO_REGS;
37154
37155 /* Prefer SSE regs only, if we can use them for math. */
37156 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37157 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37158
37159 /* Floating-point constants need more complex checks. */
37160 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37161 {
37162 /* General regs can load everything. */
37163 if (reg_class_subset_p (regclass, GENERAL_REGS))
37164 return regclass;
37165
37166 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37167 zero above. We only want to wind up preferring 80387 registers if
37168 we plan on doing computation with them. */
37169 if (TARGET_80387
37170 && standard_80387_constant_p (x) > 0)
37171 {
37172 /* Limit class to non-sse. */
37173 if (regclass == FLOAT_SSE_REGS)
37174 return FLOAT_REGS;
37175 if (regclass == FP_TOP_SSE_REGS)
37176 return FP_TOP_REG;
37177 if (regclass == FP_SECOND_SSE_REGS)
37178 return FP_SECOND_REG;
37179 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37180 return regclass;
37181 }
37182
37183 return NO_REGS;
37184 }
37185
37186 /* Generally when we see PLUS here, it's the function invariant
37187 (plus soft-fp const_int). Which can only be computed into general
37188 regs. */
37189 if (GET_CODE (x) == PLUS)
37190 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37191
37192 /* QImode constants are easy to load, but non-constant QImode data
37193 must go into Q_REGS. */
37194 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37195 {
37196 if (reg_class_subset_p (regclass, Q_REGS))
37197 return regclass;
37198 if (reg_class_subset_p (Q_REGS, regclass))
37199 return Q_REGS;
37200 return NO_REGS;
37201 }
37202
37203 return regclass;
37204 }
37205
37206 /* Discourage putting floating-point values in SSE registers unless
37207 SSE math is being used, and likewise for the 387 registers. */
37208 static reg_class_t
37209 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37210 {
37211 enum machine_mode mode = GET_MODE (x);
37212
37213 /* Restrict the output reload class to the register bank that we are doing
37214 math on. If we would like not to return a subset of CLASS, reject this
37215 alternative: if reload cannot do this, it will still use its choice. */
37216 mode = GET_MODE (x);
37217 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37218 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37219
37220 if (X87_FLOAT_MODE_P (mode))
37221 {
37222 if (regclass == FP_TOP_SSE_REGS)
37223 return FP_TOP_REG;
37224 else if (regclass == FP_SECOND_SSE_REGS)
37225 return FP_SECOND_REG;
37226 else
37227 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37228 }
37229
37230 return regclass;
37231 }
37232
37233 static reg_class_t
37234 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37235 enum machine_mode mode, secondary_reload_info *sri)
37236 {
37237 /* Double-word spills from general registers to non-offsettable memory
37238 references (zero-extended addresses) require special handling. */
37239 if (TARGET_64BIT
37240 && MEM_P (x)
37241 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37242 && INTEGER_CLASS_P (rclass)
37243 && !offsettable_memref_p (x))
37244 {
37245 sri->icode = (in_p
37246 ? CODE_FOR_reload_noff_load
37247 : CODE_FOR_reload_noff_store);
37248 /* Add the cost of moving address to a temporary. */
37249 sri->extra_cost = 1;
37250
37251 return NO_REGS;
37252 }
37253
37254 /* QImode spills from non-QI registers require
37255 intermediate register on 32bit targets. */
37256 if (mode == QImode
37257 && (MAYBE_MASK_CLASS_P (rclass)
37258 || (!TARGET_64BIT && !in_p
37259 && INTEGER_CLASS_P (rclass)
37260 && MAYBE_NON_Q_CLASS_P (rclass))))
37261 {
37262 int regno;
37263
37264 if (REG_P (x))
37265 regno = REGNO (x);
37266 else
37267 regno = -1;
37268
37269 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37270 regno = true_regnum (x);
37271
37272 /* Return Q_REGS if the operand is in memory. */
37273 if (regno == -1)
37274 return Q_REGS;
37275 }
37276
37277 /* This condition handles corner case where an expression involving
37278 pointers gets vectorized. We're trying to use the address of a
37279 stack slot as a vector initializer.
37280
37281 (set (reg:V2DI 74 [ vect_cst_.2 ])
37282 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37283
37284 Eventually frame gets turned into sp+offset like this:
37285
37286 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37287 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37288 (const_int 392 [0x188]))))
37289
37290 That later gets turned into:
37291
37292 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37293 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37294 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37295
37296 We'll have the following reload recorded:
37297
37298 Reload 0: reload_in (DI) =
37299 (plus:DI (reg/f:DI 7 sp)
37300 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37301 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37302 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37303 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37304 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37305 reload_reg_rtx: (reg:V2DI 22 xmm1)
37306
37307 Which isn't going to work since SSE instructions can't handle scalar
37308 additions. Returning GENERAL_REGS forces the addition into integer
37309 register and reload can handle subsequent reloads without problems. */
37310
37311 if (in_p && GET_CODE (x) == PLUS
37312 && SSE_CLASS_P (rclass)
37313 && SCALAR_INT_MODE_P (mode))
37314 return GENERAL_REGS;
37315
37316 return NO_REGS;
37317 }
37318
37319 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37320
37321 static bool
37322 ix86_class_likely_spilled_p (reg_class_t rclass)
37323 {
37324 switch (rclass)
37325 {
37326 case AREG:
37327 case DREG:
37328 case CREG:
37329 case BREG:
37330 case AD_REGS:
37331 case SIREG:
37332 case DIREG:
37333 case SSE_FIRST_REG:
37334 case FP_TOP_REG:
37335 case FP_SECOND_REG:
37336 return true;
37337
37338 default:
37339 break;
37340 }
37341
37342 return false;
37343 }
37344
37345 /* If we are copying between general and FP registers, we need a memory
37346 location. The same is true for SSE and MMX registers.
37347
37348 To optimize register_move_cost performance, allow inline variant.
37349
37350 The macro can't work reliably when one of the CLASSES is class containing
37351 registers from multiple units (SSE, MMX, integer). We avoid this by never
37352 combining those units in single alternative in the machine description.
37353 Ensure that this constraint holds to avoid unexpected surprises.
37354
37355 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37356 enforce these sanity checks. */
37357
37358 static inline bool
37359 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37360 enum machine_mode mode, int strict)
37361 {
37362 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37363 return false;
37364 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37365 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37366 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37367 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37368 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37369 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37370 {
37371 gcc_assert (!strict || lra_in_progress);
37372 return true;
37373 }
37374
37375 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37376 return true;
37377
37378 /* ??? This is a lie. We do have moves between mmx/general, and for
37379 mmx/sse2. But by saying we need secondary memory we discourage the
37380 register allocator from using the mmx registers unless needed. */
37381 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37382 return true;
37383
37384 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37385 {
37386 /* SSE1 doesn't have any direct moves from other classes. */
37387 if (!TARGET_SSE2)
37388 return true;
37389
37390 /* If the target says that inter-unit moves are more expensive
37391 than moving through memory, then don't generate them. */
37392 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37393 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37394 return true;
37395
37396 /* Between SSE and general, we have moves no larger than word size. */
37397 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37398 return true;
37399 }
37400
37401 return false;
37402 }
37403
37404 bool
37405 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37406 enum machine_mode mode, int strict)
37407 {
37408 return inline_secondary_memory_needed (class1, class2, mode, strict);
37409 }
37410
37411 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37412
37413 On the 80386, this is the size of MODE in words,
37414 except in the FP regs, where a single reg is always enough. */
37415
37416 static unsigned char
37417 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37418 {
37419 if (MAYBE_INTEGER_CLASS_P (rclass))
37420 {
37421 if (mode == XFmode)
37422 return (TARGET_64BIT ? 2 : 3);
37423 else if (mode == XCmode)
37424 return (TARGET_64BIT ? 4 : 6);
37425 else
37426 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37427 }
37428 else
37429 {
37430 if (COMPLEX_MODE_P (mode))
37431 return 2;
37432 else
37433 return 1;
37434 }
37435 }
37436
37437 /* Return true if the registers in CLASS cannot represent the change from
37438 modes FROM to TO. */
37439
37440 bool
37441 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37442 enum reg_class regclass)
37443 {
37444 if (from == to)
37445 return false;
37446
37447 /* x87 registers can't do subreg at all, as all values are reformatted
37448 to extended precision. */
37449 if (MAYBE_FLOAT_CLASS_P (regclass))
37450 return true;
37451
37452 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37453 {
37454 /* Vector registers do not support QI or HImode loads. If we don't
37455 disallow a change to these modes, reload will assume it's ok to
37456 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37457 the vec_dupv4hi pattern. */
37458 if (GET_MODE_SIZE (from) < 4)
37459 return true;
37460
37461 /* Vector registers do not support subreg with nonzero offsets, which
37462 are otherwise valid for integer registers. Since we can't see
37463 whether we have a nonzero offset from here, prohibit all
37464 nonparadoxical subregs changing size. */
37465 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37466 return true;
37467 }
37468
37469 return false;
37470 }
37471
37472 /* Return the cost of moving data of mode M between a
37473 register and memory. A value of 2 is the default; this cost is
37474 relative to those in `REGISTER_MOVE_COST'.
37475
37476 This function is used extensively by register_move_cost that is used to
37477 build tables at startup. Make it inline in this case.
37478 When IN is 2, return maximum of in and out move cost.
37479
37480 If moving between registers and memory is more expensive than
37481 between two registers, you should define this macro to express the
37482 relative cost.
37483
37484 Model also increased moving costs of QImode registers in non
37485 Q_REGS classes.
37486 */
37487 static inline int
37488 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37489 int in)
37490 {
37491 int cost;
37492 if (FLOAT_CLASS_P (regclass))
37493 {
37494 int index;
37495 switch (mode)
37496 {
37497 case SFmode:
37498 index = 0;
37499 break;
37500 case DFmode:
37501 index = 1;
37502 break;
37503 case XFmode:
37504 index = 2;
37505 break;
37506 default:
37507 return 100;
37508 }
37509 if (in == 2)
37510 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37511 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37512 }
37513 if (SSE_CLASS_P (regclass))
37514 {
37515 int index;
37516 switch (GET_MODE_SIZE (mode))
37517 {
37518 case 4:
37519 index = 0;
37520 break;
37521 case 8:
37522 index = 1;
37523 break;
37524 case 16:
37525 index = 2;
37526 break;
37527 default:
37528 return 100;
37529 }
37530 if (in == 2)
37531 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37532 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37533 }
37534 if (MMX_CLASS_P (regclass))
37535 {
37536 int index;
37537 switch (GET_MODE_SIZE (mode))
37538 {
37539 case 4:
37540 index = 0;
37541 break;
37542 case 8:
37543 index = 1;
37544 break;
37545 default:
37546 return 100;
37547 }
37548 if (in)
37549 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37550 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37551 }
37552 switch (GET_MODE_SIZE (mode))
37553 {
37554 case 1:
37555 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37556 {
37557 if (!in)
37558 return ix86_cost->int_store[0];
37559 if (TARGET_PARTIAL_REG_DEPENDENCY
37560 && optimize_function_for_speed_p (cfun))
37561 cost = ix86_cost->movzbl_load;
37562 else
37563 cost = ix86_cost->int_load[0];
37564 if (in == 2)
37565 return MAX (cost, ix86_cost->int_store[0]);
37566 return cost;
37567 }
37568 else
37569 {
37570 if (in == 2)
37571 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37572 if (in)
37573 return ix86_cost->movzbl_load;
37574 else
37575 return ix86_cost->int_store[0] + 4;
37576 }
37577 break;
37578 case 2:
37579 if (in == 2)
37580 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37581 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37582 default:
37583 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37584 if (mode == TFmode)
37585 mode = XFmode;
37586 if (in == 2)
37587 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37588 else if (in)
37589 cost = ix86_cost->int_load[2];
37590 else
37591 cost = ix86_cost->int_store[2];
37592 return (cost * (((int) GET_MODE_SIZE (mode)
37593 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37594 }
37595 }
37596
37597 static int
37598 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37599 bool in)
37600 {
37601 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37602 }
37603
37604
37605 /* Return the cost of moving data from a register in class CLASS1 to
37606 one in class CLASS2.
37607
37608 It is not required that the cost always equal 2 when FROM is the same as TO;
37609 on some machines it is expensive to move between registers if they are not
37610 general registers. */
37611
37612 static int
37613 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37614 reg_class_t class2_i)
37615 {
37616 enum reg_class class1 = (enum reg_class) class1_i;
37617 enum reg_class class2 = (enum reg_class) class2_i;
37618
37619 /* In case we require secondary memory, compute cost of the store followed
37620 by load. In order to avoid bad register allocation choices, we need
37621 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37622
37623 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37624 {
37625 int cost = 1;
37626
37627 cost += inline_memory_move_cost (mode, class1, 2);
37628 cost += inline_memory_move_cost (mode, class2, 2);
37629
37630 /* In case of copying from general_purpose_register we may emit multiple
37631 stores followed by single load causing memory size mismatch stall.
37632 Count this as arbitrarily high cost of 20. */
37633 if (targetm.class_max_nregs (class1, mode)
37634 > targetm.class_max_nregs (class2, mode))
37635 cost += 20;
37636
37637 /* In the case of FP/MMX moves, the registers actually overlap, and we
37638 have to switch modes in order to treat them differently. */
37639 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37640 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37641 cost += 20;
37642
37643 return cost;
37644 }
37645
37646 /* Moves between SSE/MMX and integer unit are expensive. */
37647 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37648 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37649
37650 /* ??? By keeping returned value relatively high, we limit the number
37651 of moves between integer and MMX/SSE registers for all targets.
37652 Additionally, high value prevents problem with x86_modes_tieable_p(),
37653 where integer modes in MMX/SSE registers are not tieable
37654 because of missing QImode and HImode moves to, from or between
37655 MMX/SSE registers. */
37656 return MAX (8, ix86_cost->mmxsse_to_integer);
37657
37658 if (MAYBE_FLOAT_CLASS_P (class1))
37659 return ix86_cost->fp_move;
37660 if (MAYBE_SSE_CLASS_P (class1))
37661 return ix86_cost->sse_move;
37662 if (MAYBE_MMX_CLASS_P (class1))
37663 return ix86_cost->mmx_move;
37664 return 2;
37665 }
37666
37667 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37668 MODE. */
37669
37670 bool
37671 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37672 {
37673 /* Flags and only flags can only hold CCmode values. */
37674 if (CC_REGNO_P (regno))
37675 return GET_MODE_CLASS (mode) == MODE_CC;
37676 if (GET_MODE_CLASS (mode) == MODE_CC
37677 || GET_MODE_CLASS (mode) == MODE_RANDOM
37678 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37679 return false;
37680 if (STACK_REGNO_P (regno))
37681 return VALID_FP_MODE_P (mode);
37682 if (MASK_REGNO_P (regno))
37683 return VALID_MASK_REG_MODE (mode);
37684 if (SSE_REGNO_P (regno))
37685 {
37686 /* We implement the move patterns for all vector modes into and
37687 out of SSE registers, even when no operation instructions
37688 are available. */
37689
37690 /* For AVX-512 we allow, regardless of regno:
37691 - XI mode
37692 - any of 512-bit wide vector mode
37693 - any scalar mode. */
37694 if (TARGET_AVX512F
37695 && (mode == XImode
37696 || VALID_AVX512F_REG_MODE (mode)
37697 || VALID_AVX512F_SCALAR_MODE (mode)))
37698 return true;
37699
37700 /* xmm16-xmm31 are only available for AVX-512. */
37701 if (EXT_REX_SSE_REGNO_P (regno))
37702 return false;
37703
37704 /* OImode and AVX modes are available only when AVX is enabled. */
37705 return ((TARGET_AVX
37706 && VALID_AVX256_REG_OR_OI_MODE (mode))
37707 || VALID_SSE_REG_MODE (mode)
37708 || VALID_SSE2_REG_MODE (mode)
37709 || VALID_MMX_REG_MODE (mode)
37710 || VALID_MMX_REG_MODE_3DNOW (mode));
37711 }
37712 if (MMX_REGNO_P (regno))
37713 {
37714 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37715 so if the register is available at all, then we can move data of
37716 the given mode into or out of it. */
37717 return (VALID_MMX_REG_MODE (mode)
37718 || VALID_MMX_REG_MODE_3DNOW (mode));
37719 }
37720
37721 if (mode == QImode)
37722 {
37723 /* Take care for QImode values - they can be in non-QI regs,
37724 but then they do cause partial register stalls. */
37725 if (ANY_QI_REGNO_P (regno))
37726 return true;
37727 if (!TARGET_PARTIAL_REG_STALL)
37728 return true;
37729 /* LRA checks if the hard register is OK for the given mode.
37730 QImode values can live in non-QI regs, so we allow all
37731 registers here. */
37732 if (lra_in_progress)
37733 return true;
37734 return !can_create_pseudo_p ();
37735 }
37736 /* We handle both integer and floats in the general purpose registers. */
37737 else if (VALID_INT_MODE_P (mode))
37738 return true;
37739 else if (VALID_FP_MODE_P (mode))
37740 return true;
37741 else if (VALID_DFP_MODE_P (mode))
37742 return true;
37743 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37744 on to use that value in smaller contexts, this can easily force a
37745 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37746 supporting DImode, allow it. */
37747 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37748 return true;
37749
37750 return false;
37751 }
37752
37753 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37754 tieable integer mode. */
37755
37756 static bool
37757 ix86_tieable_integer_mode_p (enum machine_mode mode)
37758 {
37759 switch (mode)
37760 {
37761 case HImode:
37762 case SImode:
37763 return true;
37764
37765 case QImode:
37766 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37767
37768 case DImode:
37769 return TARGET_64BIT;
37770
37771 default:
37772 return false;
37773 }
37774 }
37775
37776 /* Return true if MODE1 is accessible in a register that can hold MODE2
37777 without copying. That is, all register classes that can hold MODE2
37778 can also hold MODE1. */
37779
37780 bool
37781 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37782 {
37783 if (mode1 == mode2)
37784 return true;
37785
37786 if (ix86_tieable_integer_mode_p (mode1)
37787 && ix86_tieable_integer_mode_p (mode2))
37788 return true;
37789
37790 /* MODE2 being XFmode implies fp stack or general regs, which means we
37791 can tie any smaller floating point modes to it. Note that we do not
37792 tie this with TFmode. */
37793 if (mode2 == XFmode)
37794 return mode1 == SFmode || mode1 == DFmode;
37795
37796 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37797 that we can tie it with SFmode. */
37798 if (mode2 == DFmode)
37799 return mode1 == SFmode;
37800
37801 /* If MODE2 is only appropriate for an SSE register, then tie with
37802 any other mode acceptable to SSE registers. */
37803 if (GET_MODE_SIZE (mode2) == 32
37804 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37805 return (GET_MODE_SIZE (mode1) == 32
37806 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37807 if (GET_MODE_SIZE (mode2) == 16
37808 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37809 return (GET_MODE_SIZE (mode1) == 16
37810 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37811
37812 /* If MODE2 is appropriate for an MMX register, then tie
37813 with any other mode acceptable to MMX registers. */
37814 if (GET_MODE_SIZE (mode2) == 8
37815 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37816 return (GET_MODE_SIZE (mode1) == 8
37817 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37818
37819 return false;
37820 }
37821
37822 /* Return the cost of moving between two registers of mode MODE. */
37823
37824 static int
37825 ix86_set_reg_reg_cost (enum machine_mode mode)
37826 {
37827 unsigned int units = UNITS_PER_WORD;
37828
37829 switch (GET_MODE_CLASS (mode))
37830 {
37831 default:
37832 break;
37833
37834 case MODE_CC:
37835 units = GET_MODE_SIZE (CCmode);
37836 break;
37837
37838 case MODE_FLOAT:
37839 if ((TARGET_SSE && mode == TFmode)
37840 || (TARGET_80387 && mode == XFmode)
37841 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37842 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37843 units = GET_MODE_SIZE (mode);
37844 break;
37845
37846 case MODE_COMPLEX_FLOAT:
37847 if ((TARGET_SSE && mode == TCmode)
37848 || (TARGET_80387 && mode == XCmode)
37849 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37850 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37851 units = GET_MODE_SIZE (mode);
37852 break;
37853
37854 case MODE_VECTOR_INT:
37855 case MODE_VECTOR_FLOAT:
37856 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37857 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37858 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37859 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37860 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37861 units = GET_MODE_SIZE (mode);
37862 }
37863
37864 /* Return the cost of moving between two registers of mode MODE,
37865 assuming that the move will be in pieces of at most UNITS bytes. */
37866 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37867 }
37868
37869 /* Compute a (partial) cost for rtx X. Return true if the complete
37870 cost has been computed, and false if subexpressions should be
37871 scanned. In either case, *TOTAL contains the cost result. */
37872
37873 static bool
37874 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37875 bool speed)
37876 {
37877 rtx mask;
37878 enum rtx_code code = (enum rtx_code) code_i;
37879 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37880 enum machine_mode mode = GET_MODE (x);
37881 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37882
37883 switch (code)
37884 {
37885 case SET:
37886 if (register_operand (SET_DEST (x), VOIDmode)
37887 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37888 {
37889 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37890 return true;
37891 }
37892 return false;
37893
37894 case CONST_INT:
37895 case CONST:
37896 case LABEL_REF:
37897 case SYMBOL_REF:
37898 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37899 *total = 3;
37900 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37901 *total = 2;
37902 else if (flag_pic && SYMBOLIC_CONST (x)
37903 && !(TARGET_64BIT
37904 && (GET_CODE (x) == LABEL_REF
37905 || (GET_CODE (x) == SYMBOL_REF
37906 && SYMBOL_REF_LOCAL_P (x)))))
37907 *total = 1;
37908 else
37909 *total = 0;
37910 return true;
37911
37912 case CONST_DOUBLE:
37913 if (mode == VOIDmode)
37914 {
37915 *total = 0;
37916 return true;
37917 }
37918 switch (standard_80387_constant_p (x))
37919 {
37920 case 1: /* 0.0 */
37921 *total = 1;
37922 return true;
37923 default: /* Other constants */
37924 *total = 2;
37925 return true;
37926 case 0:
37927 case -1:
37928 break;
37929 }
37930 if (SSE_FLOAT_MODE_P (mode))
37931 {
37932 case CONST_VECTOR:
37933 switch (standard_sse_constant_p (x))
37934 {
37935 case 0:
37936 break;
37937 case 1: /* 0: xor eliminates false dependency */
37938 *total = 0;
37939 return true;
37940 default: /* -1: cmp contains false dependency */
37941 *total = 1;
37942 return true;
37943 }
37944 }
37945 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37946 it'll probably end up. Add a penalty for size. */
37947 *total = (COSTS_N_INSNS (1)
37948 + (flag_pic != 0 && !TARGET_64BIT)
37949 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37950 return true;
37951
37952 case ZERO_EXTEND:
37953 /* The zero extensions is often completely free on x86_64, so make
37954 it as cheap as possible. */
37955 if (TARGET_64BIT && mode == DImode
37956 && GET_MODE (XEXP (x, 0)) == SImode)
37957 *total = 1;
37958 else if (TARGET_ZERO_EXTEND_WITH_AND)
37959 *total = cost->add;
37960 else
37961 *total = cost->movzx;
37962 return false;
37963
37964 case SIGN_EXTEND:
37965 *total = cost->movsx;
37966 return false;
37967
37968 case ASHIFT:
37969 if (SCALAR_INT_MODE_P (mode)
37970 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37971 && CONST_INT_P (XEXP (x, 1)))
37972 {
37973 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37974 if (value == 1)
37975 {
37976 *total = cost->add;
37977 return false;
37978 }
37979 if ((value == 2 || value == 3)
37980 && cost->lea <= cost->shift_const)
37981 {
37982 *total = cost->lea;
37983 return false;
37984 }
37985 }
37986 /* FALLTHRU */
37987
37988 case ROTATE:
37989 case ASHIFTRT:
37990 case LSHIFTRT:
37991 case ROTATERT:
37992 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37993 {
37994 /* ??? Should be SSE vector operation cost. */
37995 /* At least for published AMD latencies, this really is the same
37996 as the latency for a simple fpu operation like fabs. */
37997 /* V*QImode is emulated with 1-11 insns. */
37998 if (mode == V16QImode || mode == V32QImode)
37999 {
38000 int count = 11;
38001 if (TARGET_XOP && mode == V16QImode)
38002 {
38003 /* For XOP we use vpshab, which requires a broadcast of the
38004 value to the variable shift insn. For constants this
38005 means a V16Q const in mem; even when we can perform the
38006 shift with one insn set the cost to prefer paddb. */
38007 if (CONSTANT_P (XEXP (x, 1)))
38008 {
38009 *total = (cost->fabs
38010 + rtx_cost (XEXP (x, 0), code, 0, speed)
38011 + (speed ? 2 : COSTS_N_BYTES (16)));
38012 return true;
38013 }
38014 count = 3;
38015 }
38016 else if (TARGET_SSSE3)
38017 count = 7;
38018 *total = cost->fabs * count;
38019 }
38020 else
38021 *total = cost->fabs;
38022 }
38023 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38024 {
38025 if (CONST_INT_P (XEXP (x, 1)))
38026 {
38027 if (INTVAL (XEXP (x, 1)) > 32)
38028 *total = cost->shift_const + COSTS_N_INSNS (2);
38029 else
38030 *total = cost->shift_const * 2;
38031 }
38032 else
38033 {
38034 if (GET_CODE (XEXP (x, 1)) == AND)
38035 *total = cost->shift_var * 2;
38036 else
38037 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38038 }
38039 }
38040 else
38041 {
38042 if (CONST_INT_P (XEXP (x, 1)))
38043 *total = cost->shift_const;
38044 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38045 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38046 {
38047 /* Return the cost after shift-and truncation. */
38048 *total = cost->shift_var;
38049 return true;
38050 }
38051 else
38052 *total = cost->shift_var;
38053 }
38054 return false;
38055
38056 case FMA:
38057 {
38058 rtx sub;
38059
38060 gcc_assert (FLOAT_MODE_P (mode));
38061 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38062
38063 /* ??? SSE scalar/vector cost should be used here. */
38064 /* ??? Bald assumption that fma has the same cost as fmul. */
38065 *total = cost->fmul;
38066 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38067
38068 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38069 sub = XEXP (x, 0);
38070 if (GET_CODE (sub) == NEG)
38071 sub = XEXP (sub, 0);
38072 *total += rtx_cost (sub, FMA, 0, speed);
38073
38074 sub = XEXP (x, 2);
38075 if (GET_CODE (sub) == NEG)
38076 sub = XEXP (sub, 0);
38077 *total += rtx_cost (sub, FMA, 2, speed);
38078 return true;
38079 }
38080
38081 case MULT:
38082 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38083 {
38084 /* ??? SSE scalar cost should be used here. */
38085 *total = cost->fmul;
38086 return false;
38087 }
38088 else if (X87_FLOAT_MODE_P (mode))
38089 {
38090 *total = cost->fmul;
38091 return false;
38092 }
38093 else if (FLOAT_MODE_P (mode))
38094 {
38095 /* ??? SSE vector cost should be used here. */
38096 *total = cost->fmul;
38097 return false;
38098 }
38099 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38100 {
38101 /* V*QImode is emulated with 7-13 insns. */
38102 if (mode == V16QImode || mode == V32QImode)
38103 {
38104 int extra = 11;
38105 if (TARGET_XOP && mode == V16QImode)
38106 extra = 5;
38107 else if (TARGET_SSSE3)
38108 extra = 6;
38109 *total = cost->fmul * 2 + cost->fabs * extra;
38110 }
38111 /* V*DImode is emulated with 5-8 insns. */
38112 else if (mode == V2DImode || mode == V4DImode)
38113 {
38114 if (TARGET_XOP && mode == V2DImode)
38115 *total = cost->fmul * 2 + cost->fabs * 3;
38116 else
38117 *total = cost->fmul * 3 + cost->fabs * 5;
38118 }
38119 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38120 insns, including two PMULUDQ. */
38121 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38122 *total = cost->fmul * 2 + cost->fabs * 5;
38123 else
38124 *total = cost->fmul;
38125 return false;
38126 }
38127 else
38128 {
38129 rtx op0 = XEXP (x, 0);
38130 rtx op1 = XEXP (x, 1);
38131 int nbits;
38132 if (CONST_INT_P (XEXP (x, 1)))
38133 {
38134 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38135 for (nbits = 0; value != 0; value &= value - 1)
38136 nbits++;
38137 }
38138 else
38139 /* This is arbitrary. */
38140 nbits = 7;
38141
38142 /* Compute costs correctly for widening multiplication. */
38143 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38144 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38145 == GET_MODE_SIZE (mode))
38146 {
38147 int is_mulwiden = 0;
38148 enum machine_mode inner_mode = GET_MODE (op0);
38149
38150 if (GET_CODE (op0) == GET_CODE (op1))
38151 is_mulwiden = 1, op1 = XEXP (op1, 0);
38152 else if (CONST_INT_P (op1))
38153 {
38154 if (GET_CODE (op0) == SIGN_EXTEND)
38155 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38156 == INTVAL (op1);
38157 else
38158 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38159 }
38160
38161 if (is_mulwiden)
38162 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38163 }
38164
38165 *total = (cost->mult_init[MODE_INDEX (mode)]
38166 + nbits * cost->mult_bit
38167 + rtx_cost (op0, outer_code, opno, speed)
38168 + rtx_cost (op1, outer_code, opno, speed));
38169
38170 return true;
38171 }
38172
38173 case DIV:
38174 case UDIV:
38175 case MOD:
38176 case UMOD:
38177 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38178 /* ??? SSE cost should be used here. */
38179 *total = cost->fdiv;
38180 else if (X87_FLOAT_MODE_P (mode))
38181 *total = cost->fdiv;
38182 else if (FLOAT_MODE_P (mode))
38183 /* ??? SSE vector cost should be used here. */
38184 *total = cost->fdiv;
38185 else
38186 *total = cost->divide[MODE_INDEX (mode)];
38187 return false;
38188
38189 case PLUS:
38190 if (GET_MODE_CLASS (mode) == MODE_INT
38191 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38192 {
38193 if (GET_CODE (XEXP (x, 0)) == PLUS
38194 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38195 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38196 && CONSTANT_P (XEXP (x, 1)))
38197 {
38198 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38199 if (val == 2 || val == 4 || val == 8)
38200 {
38201 *total = cost->lea;
38202 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38203 outer_code, opno, speed);
38204 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38205 outer_code, opno, speed);
38206 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38207 return true;
38208 }
38209 }
38210 else if (GET_CODE (XEXP (x, 0)) == MULT
38211 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38212 {
38213 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38214 if (val == 2 || val == 4 || val == 8)
38215 {
38216 *total = cost->lea;
38217 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38218 outer_code, opno, speed);
38219 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38220 return true;
38221 }
38222 }
38223 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38224 {
38225 *total = cost->lea;
38226 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38227 outer_code, opno, speed);
38228 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38229 outer_code, opno, speed);
38230 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38231 return true;
38232 }
38233 }
38234 /* FALLTHRU */
38235
38236 case MINUS:
38237 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38238 {
38239 /* ??? SSE cost should be used here. */
38240 *total = cost->fadd;
38241 return false;
38242 }
38243 else if (X87_FLOAT_MODE_P (mode))
38244 {
38245 *total = cost->fadd;
38246 return false;
38247 }
38248 else if (FLOAT_MODE_P (mode))
38249 {
38250 /* ??? SSE vector cost should be used here. */
38251 *total = cost->fadd;
38252 return false;
38253 }
38254 /* FALLTHRU */
38255
38256 case AND:
38257 case IOR:
38258 case XOR:
38259 if (GET_MODE_CLASS (mode) == MODE_INT
38260 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38261 {
38262 *total = (cost->add * 2
38263 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38264 << (GET_MODE (XEXP (x, 0)) != DImode))
38265 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38266 << (GET_MODE (XEXP (x, 1)) != DImode)));
38267 return true;
38268 }
38269 /* FALLTHRU */
38270
38271 case NEG:
38272 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38273 {
38274 /* ??? SSE cost should be used here. */
38275 *total = cost->fchs;
38276 return false;
38277 }
38278 else if (X87_FLOAT_MODE_P (mode))
38279 {
38280 *total = cost->fchs;
38281 return false;
38282 }
38283 else if (FLOAT_MODE_P (mode))
38284 {
38285 /* ??? SSE vector cost should be used here. */
38286 *total = cost->fchs;
38287 return false;
38288 }
38289 /* FALLTHRU */
38290
38291 case NOT:
38292 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38293 {
38294 /* ??? Should be SSE vector operation cost. */
38295 /* At least for published AMD latencies, this really is the same
38296 as the latency for a simple fpu operation like fabs. */
38297 *total = cost->fabs;
38298 }
38299 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38300 *total = cost->add * 2;
38301 else
38302 *total = cost->add;
38303 return false;
38304
38305 case COMPARE:
38306 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38307 && XEXP (XEXP (x, 0), 1) == const1_rtx
38308 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38309 && XEXP (x, 1) == const0_rtx)
38310 {
38311 /* This kind of construct is implemented using test[bwl].
38312 Treat it as if we had an AND. */
38313 *total = (cost->add
38314 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38315 + rtx_cost (const1_rtx, outer_code, opno, speed));
38316 return true;
38317 }
38318 return false;
38319
38320 case FLOAT_EXTEND:
38321 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38322 *total = 0;
38323 return false;
38324
38325 case ABS:
38326 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38327 /* ??? SSE cost should be used here. */
38328 *total = cost->fabs;
38329 else if (X87_FLOAT_MODE_P (mode))
38330 *total = cost->fabs;
38331 else if (FLOAT_MODE_P (mode))
38332 /* ??? SSE vector cost should be used here. */
38333 *total = cost->fabs;
38334 return false;
38335
38336 case SQRT:
38337 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38338 /* ??? SSE cost should be used here. */
38339 *total = cost->fsqrt;
38340 else if (X87_FLOAT_MODE_P (mode))
38341 *total = cost->fsqrt;
38342 else if (FLOAT_MODE_P (mode))
38343 /* ??? SSE vector cost should be used here. */
38344 *total = cost->fsqrt;
38345 return false;
38346
38347 case UNSPEC:
38348 if (XINT (x, 1) == UNSPEC_TP)
38349 *total = 0;
38350 return false;
38351
38352 case VEC_SELECT:
38353 case VEC_CONCAT:
38354 case VEC_DUPLICATE:
38355 /* ??? Assume all of these vector manipulation patterns are
38356 recognizable. In which case they all pretty much have the
38357 same cost. */
38358 *total = cost->fabs;
38359 return true;
38360 case VEC_MERGE:
38361 mask = XEXP (x, 2);
38362 /* This is masked instruction, assume the same cost,
38363 as nonmasked variant. */
38364 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38365 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38366 else
38367 *total = cost->fabs;
38368 return true;
38369
38370 default:
38371 return false;
38372 }
38373 }
38374
38375 #if TARGET_MACHO
38376
38377 static int current_machopic_label_num;
38378
38379 /* Given a symbol name and its associated stub, write out the
38380 definition of the stub. */
38381
38382 void
38383 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38384 {
38385 unsigned int length;
38386 char *binder_name, *symbol_name, lazy_ptr_name[32];
38387 int label = ++current_machopic_label_num;
38388
38389 /* For 64-bit we shouldn't get here. */
38390 gcc_assert (!TARGET_64BIT);
38391
38392 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38393 symb = targetm.strip_name_encoding (symb);
38394
38395 length = strlen (stub);
38396 binder_name = XALLOCAVEC (char, length + 32);
38397 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38398
38399 length = strlen (symb);
38400 symbol_name = XALLOCAVEC (char, length + 32);
38401 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38402
38403 sprintf (lazy_ptr_name, "L%d$lz", label);
38404
38405 if (MACHOPIC_ATT_STUB)
38406 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38407 else if (MACHOPIC_PURE)
38408 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38409 else
38410 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38411
38412 fprintf (file, "%s:\n", stub);
38413 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38414
38415 if (MACHOPIC_ATT_STUB)
38416 {
38417 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38418 }
38419 else if (MACHOPIC_PURE)
38420 {
38421 /* PIC stub. */
38422 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38423 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38424 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38425 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38426 label, lazy_ptr_name, label);
38427 fprintf (file, "\tjmp\t*%%ecx\n");
38428 }
38429 else
38430 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38431
38432 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38433 it needs no stub-binding-helper. */
38434 if (MACHOPIC_ATT_STUB)
38435 return;
38436
38437 fprintf (file, "%s:\n", binder_name);
38438
38439 if (MACHOPIC_PURE)
38440 {
38441 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38442 fprintf (file, "\tpushl\t%%ecx\n");
38443 }
38444 else
38445 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38446
38447 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38448
38449 /* N.B. Keep the correspondence of these
38450 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38451 old-pic/new-pic/non-pic stubs; altering this will break
38452 compatibility with existing dylibs. */
38453 if (MACHOPIC_PURE)
38454 {
38455 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38456 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38457 }
38458 else
38459 /* 16-byte -mdynamic-no-pic stub. */
38460 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38461
38462 fprintf (file, "%s:\n", lazy_ptr_name);
38463 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38464 fprintf (file, ASM_LONG "%s\n", binder_name);
38465 }
38466 #endif /* TARGET_MACHO */
38467
38468 /* Order the registers for register allocator. */
38469
38470 void
38471 x86_order_regs_for_local_alloc (void)
38472 {
38473 int pos = 0;
38474 int i;
38475
38476 /* First allocate the local general purpose registers. */
38477 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38478 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38479 reg_alloc_order [pos++] = i;
38480
38481 /* Global general purpose registers. */
38482 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38483 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38484 reg_alloc_order [pos++] = i;
38485
38486 /* x87 registers come first in case we are doing FP math
38487 using them. */
38488 if (!TARGET_SSE_MATH)
38489 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38490 reg_alloc_order [pos++] = i;
38491
38492 /* SSE registers. */
38493 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38494 reg_alloc_order [pos++] = i;
38495 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38496 reg_alloc_order [pos++] = i;
38497
38498 /* Extended REX SSE registers. */
38499 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38500 reg_alloc_order [pos++] = i;
38501
38502 /* Mask register. */
38503 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38504 reg_alloc_order [pos++] = i;
38505
38506 /* x87 registers. */
38507 if (TARGET_SSE_MATH)
38508 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38509 reg_alloc_order [pos++] = i;
38510
38511 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38512 reg_alloc_order [pos++] = i;
38513
38514 /* Initialize the rest of array as we do not allocate some registers
38515 at all. */
38516 while (pos < FIRST_PSEUDO_REGISTER)
38517 reg_alloc_order [pos++] = 0;
38518 }
38519
38520 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38521 in struct attribute_spec handler. */
38522 static tree
38523 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38524 tree args,
38525 int,
38526 bool *no_add_attrs)
38527 {
38528 if (TREE_CODE (*node) != FUNCTION_TYPE
38529 && TREE_CODE (*node) != METHOD_TYPE
38530 && TREE_CODE (*node) != FIELD_DECL
38531 && TREE_CODE (*node) != TYPE_DECL)
38532 {
38533 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38534 name);
38535 *no_add_attrs = true;
38536 return NULL_TREE;
38537 }
38538 if (TARGET_64BIT)
38539 {
38540 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38541 name);
38542 *no_add_attrs = true;
38543 return NULL_TREE;
38544 }
38545 if (is_attribute_p ("callee_pop_aggregate_return", name))
38546 {
38547 tree cst;
38548
38549 cst = TREE_VALUE (args);
38550 if (TREE_CODE (cst) != INTEGER_CST)
38551 {
38552 warning (OPT_Wattributes,
38553 "%qE attribute requires an integer constant argument",
38554 name);
38555 *no_add_attrs = true;
38556 }
38557 else if (compare_tree_int (cst, 0) != 0
38558 && compare_tree_int (cst, 1) != 0)
38559 {
38560 warning (OPT_Wattributes,
38561 "argument to %qE attribute is neither zero, nor one",
38562 name);
38563 *no_add_attrs = true;
38564 }
38565
38566 return NULL_TREE;
38567 }
38568
38569 return NULL_TREE;
38570 }
38571
38572 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38573 struct attribute_spec.handler. */
38574 static tree
38575 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38576 bool *no_add_attrs)
38577 {
38578 if (TREE_CODE (*node) != FUNCTION_TYPE
38579 && TREE_CODE (*node) != METHOD_TYPE
38580 && TREE_CODE (*node) != FIELD_DECL
38581 && TREE_CODE (*node) != TYPE_DECL)
38582 {
38583 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38584 name);
38585 *no_add_attrs = true;
38586 return NULL_TREE;
38587 }
38588
38589 /* Can combine regparm with all attributes but fastcall. */
38590 if (is_attribute_p ("ms_abi", name))
38591 {
38592 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38593 {
38594 error ("ms_abi and sysv_abi attributes are not compatible");
38595 }
38596
38597 return NULL_TREE;
38598 }
38599 else if (is_attribute_p ("sysv_abi", name))
38600 {
38601 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38602 {
38603 error ("ms_abi and sysv_abi attributes are not compatible");
38604 }
38605
38606 return NULL_TREE;
38607 }
38608
38609 return NULL_TREE;
38610 }
38611
38612 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38613 struct attribute_spec.handler. */
38614 static tree
38615 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38616 bool *no_add_attrs)
38617 {
38618 tree *type = NULL;
38619 if (DECL_P (*node))
38620 {
38621 if (TREE_CODE (*node) == TYPE_DECL)
38622 type = &TREE_TYPE (*node);
38623 }
38624 else
38625 type = node;
38626
38627 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38628 {
38629 warning (OPT_Wattributes, "%qE attribute ignored",
38630 name);
38631 *no_add_attrs = true;
38632 }
38633
38634 else if ((is_attribute_p ("ms_struct", name)
38635 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38636 || ((is_attribute_p ("gcc_struct", name)
38637 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38638 {
38639 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38640 name);
38641 *no_add_attrs = true;
38642 }
38643
38644 return NULL_TREE;
38645 }
38646
38647 static tree
38648 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38649 bool *no_add_attrs)
38650 {
38651 if (TREE_CODE (*node) != FUNCTION_DECL)
38652 {
38653 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38654 name);
38655 *no_add_attrs = true;
38656 }
38657 return NULL_TREE;
38658 }
38659
38660 static bool
38661 ix86_ms_bitfield_layout_p (const_tree record_type)
38662 {
38663 return ((TARGET_MS_BITFIELD_LAYOUT
38664 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38665 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38666 }
38667
38668 /* Returns an expression indicating where the this parameter is
38669 located on entry to the FUNCTION. */
38670
38671 static rtx
38672 x86_this_parameter (tree function)
38673 {
38674 tree type = TREE_TYPE (function);
38675 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38676 int nregs;
38677
38678 if (TARGET_64BIT)
38679 {
38680 const int *parm_regs;
38681
38682 if (ix86_function_type_abi (type) == MS_ABI)
38683 parm_regs = x86_64_ms_abi_int_parameter_registers;
38684 else
38685 parm_regs = x86_64_int_parameter_registers;
38686 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38687 }
38688
38689 nregs = ix86_function_regparm (type, function);
38690
38691 if (nregs > 0 && !stdarg_p (type))
38692 {
38693 int regno;
38694 unsigned int ccvt = ix86_get_callcvt (type);
38695
38696 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38697 regno = aggr ? DX_REG : CX_REG;
38698 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38699 {
38700 regno = CX_REG;
38701 if (aggr)
38702 return gen_rtx_MEM (SImode,
38703 plus_constant (Pmode, stack_pointer_rtx, 4));
38704 }
38705 else
38706 {
38707 regno = AX_REG;
38708 if (aggr)
38709 {
38710 regno = DX_REG;
38711 if (nregs == 1)
38712 return gen_rtx_MEM (SImode,
38713 plus_constant (Pmode,
38714 stack_pointer_rtx, 4));
38715 }
38716 }
38717 return gen_rtx_REG (SImode, regno);
38718 }
38719
38720 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38721 aggr ? 8 : 4));
38722 }
38723
38724 /* Determine whether x86_output_mi_thunk can succeed. */
38725
38726 static bool
38727 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38728 const_tree function)
38729 {
38730 /* 64-bit can handle anything. */
38731 if (TARGET_64BIT)
38732 return true;
38733
38734 /* For 32-bit, everything's fine if we have one free register. */
38735 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38736 return true;
38737
38738 /* Need a free register for vcall_offset. */
38739 if (vcall_offset)
38740 return false;
38741
38742 /* Need a free register for GOT references. */
38743 if (flag_pic && !targetm.binds_local_p (function))
38744 return false;
38745
38746 /* Otherwise ok. */
38747 return true;
38748 }
38749
38750 /* Output the assembler code for a thunk function. THUNK_DECL is the
38751 declaration for the thunk function itself, FUNCTION is the decl for
38752 the target function. DELTA is an immediate constant offset to be
38753 added to THIS. If VCALL_OFFSET is nonzero, the word at
38754 *(*this + vcall_offset) should be added to THIS. */
38755
38756 static void
38757 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38758 HOST_WIDE_INT vcall_offset, tree function)
38759 {
38760 rtx this_param = x86_this_parameter (function);
38761 rtx this_reg, tmp, fnaddr;
38762 unsigned int tmp_regno;
38763
38764 if (TARGET_64BIT)
38765 tmp_regno = R10_REG;
38766 else
38767 {
38768 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38769 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38770 tmp_regno = AX_REG;
38771 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38772 tmp_regno = DX_REG;
38773 else
38774 tmp_regno = CX_REG;
38775 }
38776
38777 emit_note (NOTE_INSN_PROLOGUE_END);
38778
38779 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38780 pull it in now and let DELTA benefit. */
38781 if (REG_P (this_param))
38782 this_reg = this_param;
38783 else if (vcall_offset)
38784 {
38785 /* Put the this parameter into %eax. */
38786 this_reg = gen_rtx_REG (Pmode, AX_REG);
38787 emit_move_insn (this_reg, this_param);
38788 }
38789 else
38790 this_reg = NULL_RTX;
38791
38792 /* Adjust the this parameter by a fixed constant. */
38793 if (delta)
38794 {
38795 rtx delta_rtx = GEN_INT (delta);
38796 rtx delta_dst = this_reg ? this_reg : this_param;
38797
38798 if (TARGET_64BIT)
38799 {
38800 if (!x86_64_general_operand (delta_rtx, Pmode))
38801 {
38802 tmp = gen_rtx_REG (Pmode, tmp_regno);
38803 emit_move_insn (tmp, delta_rtx);
38804 delta_rtx = tmp;
38805 }
38806 }
38807
38808 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38809 }
38810
38811 /* Adjust the this parameter by a value stored in the vtable. */
38812 if (vcall_offset)
38813 {
38814 rtx vcall_addr, vcall_mem, this_mem;
38815
38816 tmp = gen_rtx_REG (Pmode, tmp_regno);
38817
38818 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38819 if (Pmode != ptr_mode)
38820 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38821 emit_move_insn (tmp, this_mem);
38822
38823 /* Adjust the this parameter. */
38824 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38825 if (TARGET_64BIT
38826 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38827 {
38828 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38829 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38830 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38831 }
38832
38833 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38834 if (Pmode != ptr_mode)
38835 emit_insn (gen_addsi_1_zext (this_reg,
38836 gen_rtx_REG (ptr_mode,
38837 REGNO (this_reg)),
38838 vcall_mem));
38839 else
38840 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38841 }
38842
38843 /* If necessary, drop THIS back to its stack slot. */
38844 if (this_reg && this_reg != this_param)
38845 emit_move_insn (this_param, this_reg);
38846
38847 fnaddr = XEXP (DECL_RTL (function), 0);
38848 if (TARGET_64BIT)
38849 {
38850 if (!flag_pic || targetm.binds_local_p (function)
38851 || TARGET_PECOFF)
38852 ;
38853 else
38854 {
38855 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38856 tmp = gen_rtx_CONST (Pmode, tmp);
38857 fnaddr = gen_const_mem (Pmode, tmp);
38858 }
38859 }
38860 else
38861 {
38862 if (!flag_pic || targetm.binds_local_p (function))
38863 ;
38864 #if TARGET_MACHO
38865 else if (TARGET_MACHO)
38866 {
38867 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38868 fnaddr = XEXP (fnaddr, 0);
38869 }
38870 #endif /* TARGET_MACHO */
38871 else
38872 {
38873 tmp = gen_rtx_REG (Pmode, CX_REG);
38874 output_set_got (tmp, NULL_RTX);
38875
38876 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38877 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38878 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38879 fnaddr = gen_const_mem (Pmode, fnaddr);
38880 }
38881 }
38882
38883 /* Our sibling call patterns do not allow memories, because we have no
38884 predicate that can distinguish between frame and non-frame memory.
38885 For our purposes here, we can get away with (ab)using a jump pattern,
38886 because we're going to do no optimization. */
38887 if (MEM_P (fnaddr))
38888 {
38889 if (sibcall_insn_operand (fnaddr, word_mode))
38890 {
38891 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38892 tmp = emit_call_insn (tmp);
38893 SIBLING_CALL_P (tmp) = 1;
38894 }
38895 else
38896 emit_jump_insn (gen_indirect_jump (fnaddr));
38897 }
38898 else
38899 {
38900 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38901 fnaddr = legitimize_pic_address (fnaddr,
38902 gen_rtx_REG (Pmode, tmp_regno));
38903
38904 if (!sibcall_insn_operand (fnaddr, word_mode))
38905 {
38906 tmp = gen_rtx_REG (word_mode, tmp_regno);
38907 if (GET_MODE (fnaddr) != word_mode)
38908 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38909 emit_move_insn (tmp, fnaddr);
38910 fnaddr = tmp;
38911 }
38912
38913 tmp = gen_rtx_MEM (QImode, fnaddr);
38914 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38915 tmp = emit_call_insn (tmp);
38916 SIBLING_CALL_P (tmp) = 1;
38917 }
38918 emit_barrier ();
38919
38920 /* Emit just enough of rest_of_compilation to get the insns emitted.
38921 Note that use_thunk calls assemble_start_function et al. */
38922 tmp = get_insns ();
38923 shorten_branches (tmp);
38924 final_start_function (tmp, file, 1);
38925 final (tmp, file, 1);
38926 final_end_function ();
38927 }
38928
38929 static void
38930 x86_file_start (void)
38931 {
38932 default_file_start ();
38933 if (TARGET_16BIT)
38934 fputs ("\t.code16gcc\n", asm_out_file);
38935 #if TARGET_MACHO
38936 darwin_file_start ();
38937 #endif
38938 if (X86_FILE_START_VERSION_DIRECTIVE)
38939 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38940 if (X86_FILE_START_FLTUSED)
38941 fputs ("\t.global\t__fltused\n", asm_out_file);
38942 if (ix86_asm_dialect == ASM_INTEL)
38943 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38944 }
38945
38946 int
38947 x86_field_alignment (tree field, int computed)
38948 {
38949 enum machine_mode mode;
38950 tree type = TREE_TYPE (field);
38951
38952 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38953 return computed;
38954 mode = TYPE_MODE (strip_array_types (type));
38955 if (mode == DFmode || mode == DCmode
38956 || GET_MODE_CLASS (mode) == MODE_INT
38957 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38958 return MIN (32, computed);
38959 return computed;
38960 }
38961
38962 /* Output assembler code to FILE to increment profiler label # LABELNO
38963 for profiling a function entry. */
38964 void
38965 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38966 {
38967 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38968 : MCOUNT_NAME);
38969
38970 if (TARGET_64BIT)
38971 {
38972 #ifndef NO_PROFILE_COUNTERS
38973 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38974 #endif
38975
38976 if (!TARGET_PECOFF && flag_pic)
38977 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38978 else
38979 fprintf (file, "\tcall\t%s\n", mcount_name);
38980 }
38981 else if (flag_pic)
38982 {
38983 #ifndef NO_PROFILE_COUNTERS
38984 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38985 LPREFIX, labelno);
38986 #endif
38987 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38988 }
38989 else
38990 {
38991 #ifndef NO_PROFILE_COUNTERS
38992 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38993 LPREFIX, labelno);
38994 #endif
38995 fprintf (file, "\tcall\t%s\n", mcount_name);
38996 }
38997 }
38998
38999 /* We don't have exact information about the insn sizes, but we may assume
39000 quite safely that we are informed about all 1 byte insns and memory
39001 address sizes. This is enough to eliminate unnecessary padding in
39002 99% of cases. */
39003
39004 static int
39005 min_insn_size (rtx insn)
39006 {
39007 int l = 0, len;
39008
39009 if (!INSN_P (insn) || !active_insn_p (insn))
39010 return 0;
39011
39012 /* Discard alignments we've emit and jump instructions. */
39013 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39014 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39015 return 0;
39016
39017 /* Important case - calls are always 5 bytes.
39018 It is common to have many calls in the row. */
39019 if (CALL_P (insn)
39020 && symbolic_reference_mentioned_p (PATTERN (insn))
39021 && !SIBLING_CALL_P (insn))
39022 return 5;
39023 len = get_attr_length (insn);
39024 if (len <= 1)
39025 return 1;
39026
39027 /* For normal instructions we rely on get_attr_length being exact,
39028 with a few exceptions. */
39029 if (!JUMP_P (insn))
39030 {
39031 enum attr_type type = get_attr_type (insn);
39032
39033 switch (type)
39034 {
39035 case TYPE_MULTI:
39036 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39037 || asm_noperands (PATTERN (insn)) >= 0)
39038 return 0;
39039 break;
39040 case TYPE_OTHER:
39041 case TYPE_FCMP:
39042 break;
39043 default:
39044 /* Otherwise trust get_attr_length. */
39045 return len;
39046 }
39047
39048 l = get_attr_length_address (insn);
39049 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39050 l = 4;
39051 }
39052 if (l)
39053 return 1+l;
39054 else
39055 return 2;
39056 }
39057
39058 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39059
39060 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39061 window. */
39062
39063 static void
39064 ix86_avoid_jump_mispredicts (void)
39065 {
39066 rtx insn, start = get_insns ();
39067 int nbytes = 0, njumps = 0;
39068 int isjump = 0;
39069
39070 /* Look for all minimal intervals of instructions containing 4 jumps.
39071 The intervals are bounded by START and INSN. NBYTES is the total
39072 size of instructions in the interval including INSN and not including
39073 START. When the NBYTES is smaller than 16 bytes, it is possible
39074 that the end of START and INSN ends up in the same 16byte page.
39075
39076 The smallest offset in the page INSN can start is the case where START
39077 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39078 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39079
39080 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39081 have to, control transfer to label(s) can be performed through other
39082 means, and also we estimate minimum length of all asm stmts as 0. */
39083 for (insn = start; insn; insn = NEXT_INSN (insn))
39084 {
39085 int min_size;
39086
39087 if (LABEL_P (insn))
39088 {
39089 int align = label_to_alignment (insn);
39090 int max_skip = label_to_max_skip (insn);
39091
39092 if (max_skip > 15)
39093 max_skip = 15;
39094 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39095 already in the current 16 byte page, because otherwise
39096 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39097 bytes to reach 16 byte boundary. */
39098 if (align <= 0
39099 || (align <= 3 && max_skip != (1 << align) - 1))
39100 max_skip = 0;
39101 if (dump_file)
39102 fprintf (dump_file, "Label %i with max_skip %i\n",
39103 INSN_UID (insn), max_skip);
39104 if (max_skip)
39105 {
39106 while (nbytes + max_skip >= 16)
39107 {
39108 start = NEXT_INSN (start);
39109 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39110 || CALL_P (start))
39111 njumps--, isjump = 1;
39112 else
39113 isjump = 0;
39114 nbytes -= min_insn_size (start);
39115 }
39116 }
39117 continue;
39118 }
39119
39120 min_size = min_insn_size (insn);
39121 nbytes += min_size;
39122 if (dump_file)
39123 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39124 INSN_UID (insn), min_size);
39125 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39126 || CALL_P (insn))
39127 njumps++;
39128 else
39129 continue;
39130
39131 while (njumps > 3)
39132 {
39133 start = NEXT_INSN (start);
39134 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39135 || CALL_P (start))
39136 njumps--, isjump = 1;
39137 else
39138 isjump = 0;
39139 nbytes -= min_insn_size (start);
39140 }
39141 gcc_assert (njumps >= 0);
39142 if (dump_file)
39143 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39144 INSN_UID (start), INSN_UID (insn), nbytes);
39145
39146 if (njumps == 3 && isjump && nbytes < 16)
39147 {
39148 int padsize = 15 - nbytes + min_insn_size (insn);
39149
39150 if (dump_file)
39151 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39152 INSN_UID (insn), padsize);
39153 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39154 }
39155 }
39156 }
39157 #endif
39158
39159 /* AMD Athlon works faster
39160 when RET is not destination of conditional jump or directly preceded
39161 by other jump instruction. We avoid the penalty by inserting NOP just
39162 before the RET instructions in such cases. */
39163 static void
39164 ix86_pad_returns (void)
39165 {
39166 edge e;
39167 edge_iterator ei;
39168
39169 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39170 {
39171 basic_block bb = e->src;
39172 rtx ret = BB_END (bb);
39173 rtx prev;
39174 bool replace = false;
39175
39176 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39177 || optimize_bb_for_size_p (bb))
39178 continue;
39179 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39180 if (active_insn_p (prev) || LABEL_P (prev))
39181 break;
39182 if (prev && LABEL_P (prev))
39183 {
39184 edge e;
39185 edge_iterator ei;
39186
39187 FOR_EACH_EDGE (e, ei, bb->preds)
39188 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39189 && !(e->flags & EDGE_FALLTHRU))
39190 {
39191 replace = true;
39192 break;
39193 }
39194 }
39195 if (!replace)
39196 {
39197 prev = prev_active_insn (ret);
39198 if (prev
39199 && ((JUMP_P (prev) && any_condjump_p (prev))
39200 || CALL_P (prev)))
39201 replace = true;
39202 /* Empty functions get branch mispredict even when
39203 the jump destination is not visible to us. */
39204 if (!prev && !optimize_function_for_size_p (cfun))
39205 replace = true;
39206 }
39207 if (replace)
39208 {
39209 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39210 delete_insn (ret);
39211 }
39212 }
39213 }
39214
39215 /* Count the minimum number of instructions in BB. Return 4 if the
39216 number of instructions >= 4. */
39217
39218 static int
39219 ix86_count_insn_bb (basic_block bb)
39220 {
39221 rtx insn;
39222 int insn_count = 0;
39223
39224 /* Count number of instructions in this block. Return 4 if the number
39225 of instructions >= 4. */
39226 FOR_BB_INSNS (bb, insn)
39227 {
39228 /* Only happen in exit blocks. */
39229 if (JUMP_P (insn)
39230 && ANY_RETURN_P (PATTERN (insn)))
39231 break;
39232
39233 if (NONDEBUG_INSN_P (insn)
39234 && GET_CODE (PATTERN (insn)) != USE
39235 && GET_CODE (PATTERN (insn)) != CLOBBER)
39236 {
39237 insn_count++;
39238 if (insn_count >= 4)
39239 return insn_count;
39240 }
39241 }
39242
39243 return insn_count;
39244 }
39245
39246
39247 /* Count the minimum number of instructions in code path in BB.
39248 Return 4 if the number of instructions >= 4. */
39249
39250 static int
39251 ix86_count_insn (basic_block bb)
39252 {
39253 edge e;
39254 edge_iterator ei;
39255 int min_prev_count;
39256
39257 /* Only bother counting instructions along paths with no
39258 more than 2 basic blocks between entry and exit. Given
39259 that BB has an edge to exit, determine if a predecessor
39260 of BB has an edge from entry. If so, compute the number
39261 of instructions in the predecessor block. If there
39262 happen to be multiple such blocks, compute the minimum. */
39263 min_prev_count = 4;
39264 FOR_EACH_EDGE (e, ei, bb->preds)
39265 {
39266 edge prev_e;
39267 edge_iterator prev_ei;
39268
39269 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39270 {
39271 min_prev_count = 0;
39272 break;
39273 }
39274 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39275 {
39276 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39277 {
39278 int count = ix86_count_insn_bb (e->src);
39279 if (count < min_prev_count)
39280 min_prev_count = count;
39281 break;
39282 }
39283 }
39284 }
39285
39286 if (min_prev_count < 4)
39287 min_prev_count += ix86_count_insn_bb (bb);
39288
39289 return min_prev_count;
39290 }
39291
39292 /* Pad short function to 4 instructions. */
39293
39294 static void
39295 ix86_pad_short_function (void)
39296 {
39297 edge e;
39298 edge_iterator ei;
39299
39300 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39301 {
39302 rtx ret = BB_END (e->src);
39303 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39304 {
39305 int insn_count = ix86_count_insn (e->src);
39306
39307 /* Pad short function. */
39308 if (insn_count < 4)
39309 {
39310 rtx insn = ret;
39311
39312 /* Find epilogue. */
39313 while (insn
39314 && (!NOTE_P (insn)
39315 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39316 insn = PREV_INSN (insn);
39317
39318 if (!insn)
39319 insn = ret;
39320
39321 /* Two NOPs count as one instruction. */
39322 insn_count = 2 * (4 - insn_count);
39323 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39324 }
39325 }
39326 }
39327 }
39328
39329 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39330 the epilogue, the Windows system unwinder will apply epilogue logic and
39331 produce incorrect offsets. This can be avoided by adding a nop between
39332 the last insn that can throw and the first insn of the epilogue. */
39333
39334 static void
39335 ix86_seh_fixup_eh_fallthru (void)
39336 {
39337 edge e;
39338 edge_iterator ei;
39339
39340 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39341 {
39342 rtx insn, next;
39343
39344 /* Find the beginning of the epilogue. */
39345 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39346 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39347 break;
39348 if (insn == NULL)
39349 continue;
39350
39351 /* We only care about preceding insns that can throw. */
39352 insn = prev_active_insn (insn);
39353 if (insn == NULL || !can_throw_internal (insn))
39354 continue;
39355
39356 /* Do not separate calls from their debug information. */
39357 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39358 if (NOTE_P (next)
39359 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39360 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39361 insn = next;
39362 else
39363 break;
39364
39365 emit_insn_after (gen_nops (const1_rtx), insn);
39366 }
39367 }
39368
39369 /* Implement machine specific optimizations. We implement padding of returns
39370 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39371 static void
39372 ix86_reorg (void)
39373 {
39374 /* We are freeing block_for_insn in the toplev to keep compatibility
39375 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39376 compute_bb_for_insn ();
39377
39378 if (TARGET_SEH && current_function_has_exception_handlers ())
39379 ix86_seh_fixup_eh_fallthru ();
39380
39381 if (optimize && optimize_function_for_speed_p (cfun))
39382 {
39383 if (TARGET_PAD_SHORT_FUNCTION)
39384 ix86_pad_short_function ();
39385 else if (TARGET_PAD_RETURNS)
39386 ix86_pad_returns ();
39387 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39388 if (TARGET_FOUR_JUMP_LIMIT)
39389 ix86_avoid_jump_mispredicts ();
39390 #endif
39391 }
39392 }
39393
39394 /* Return nonzero when QImode register that must be represented via REX prefix
39395 is used. */
39396 bool
39397 x86_extended_QIreg_mentioned_p (rtx insn)
39398 {
39399 int i;
39400 extract_insn_cached (insn);
39401 for (i = 0; i < recog_data.n_operands; i++)
39402 if (GENERAL_REG_P (recog_data.operand[i])
39403 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39404 return true;
39405 return false;
39406 }
39407
39408 /* Return nonzero when P points to register encoded via REX prefix.
39409 Called via for_each_rtx. */
39410 static int
39411 extended_reg_mentioned_1 (rtx *p, void *)
39412 {
39413 unsigned int regno;
39414 if (!REG_P (*p))
39415 return 0;
39416 regno = REGNO (*p);
39417 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39418 }
39419
39420 /* Return true when INSN mentions register that must be encoded using REX
39421 prefix. */
39422 bool
39423 x86_extended_reg_mentioned_p (rtx insn)
39424 {
39425 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39426 extended_reg_mentioned_1, NULL);
39427 }
39428
39429 /* If profitable, negate (without causing overflow) integer constant
39430 of mode MODE at location LOC. Return true in this case. */
39431 bool
39432 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39433 {
39434 HOST_WIDE_INT val;
39435
39436 if (!CONST_INT_P (*loc))
39437 return false;
39438
39439 switch (mode)
39440 {
39441 case DImode:
39442 /* DImode x86_64 constants must fit in 32 bits. */
39443 gcc_assert (x86_64_immediate_operand (*loc, mode));
39444
39445 mode = SImode;
39446 break;
39447
39448 case SImode:
39449 case HImode:
39450 case QImode:
39451 break;
39452
39453 default:
39454 gcc_unreachable ();
39455 }
39456
39457 /* Avoid overflows. */
39458 if (mode_signbit_p (mode, *loc))
39459 return false;
39460
39461 val = INTVAL (*loc);
39462
39463 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39464 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39465 if ((val < 0 && val != -128)
39466 || val == 128)
39467 {
39468 *loc = GEN_INT (-val);
39469 return true;
39470 }
39471
39472 return false;
39473 }
39474
39475 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39476 optabs would emit if we didn't have TFmode patterns. */
39477
39478 void
39479 x86_emit_floatuns (rtx operands[2])
39480 {
39481 rtx neglab, donelab, i0, i1, f0, in, out;
39482 enum machine_mode mode, inmode;
39483
39484 inmode = GET_MODE (operands[1]);
39485 gcc_assert (inmode == SImode || inmode == DImode);
39486
39487 out = operands[0];
39488 in = force_reg (inmode, operands[1]);
39489 mode = GET_MODE (out);
39490 neglab = gen_label_rtx ();
39491 donelab = gen_label_rtx ();
39492 f0 = gen_reg_rtx (mode);
39493
39494 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39495
39496 expand_float (out, in, 0);
39497
39498 emit_jump_insn (gen_jump (donelab));
39499 emit_barrier ();
39500
39501 emit_label (neglab);
39502
39503 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39504 1, OPTAB_DIRECT);
39505 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39506 1, OPTAB_DIRECT);
39507 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39508
39509 expand_float (f0, i0, 0);
39510
39511 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39512
39513 emit_label (donelab);
39514 }
39515 \f
39516 /* AVX512F does support 64-byte integer vector operations,
39517 thus the longest vector we are faced with is V64QImode. */
39518 #define MAX_VECT_LEN 64
39519
39520 struct expand_vec_perm_d
39521 {
39522 rtx target, op0, op1;
39523 unsigned char perm[MAX_VECT_LEN];
39524 enum machine_mode vmode;
39525 unsigned char nelt;
39526 bool one_operand_p;
39527 bool testing_p;
39528 };
39529
39530 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39531 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39532 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39533
39534 /* Get a vector mode of the same size as the original but with elements
39535 twice as wide. This is only guaranteed to apply to integral vectors. */
39536
39537 static inline enum machine_mode
39538 get_mode_wider_vector (enum machine_mode o)
39539 {
39540 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39541 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39542 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39543 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39544 return n;
39545 }
39546
39547 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39548 fill target with val via vec_duplicate. */
39549
39550 static bool
39551 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39552 {
39553 bool ok;
39554 rtx insn, dup;
39555
39556 /* First attempt to recognize VAL as-is. */
39557 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39558 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39559 if (recog_memoized (insn) < 0)
39560 {
39561 rtx seq;
39562 /* If that fails, force VAL into a register. */
39563
39564 start_sequence ();
39565 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39566 seq = get_insns ();
39567 end_sequence ();
39568 if (seq)
39569 emit_insn_before (seq, insn);
39570
39571 ok = recog_memoized (insn) >= 0;
39572 gcc_assert (ok);
39573 }
39574 return true;
39575 }
39576
39577 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39578 with all elements equal to VAR. Return true if successful. */
39579
39580 static bool
39581 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39582 rtx target, rtx val)
39583 {
39584 bool ok;
39585
39586 switch (mode)
39587 {
39588 case V2SImode:
39589 case V2SFmode:
39590 if (!mmx_ok)
39591 return false;
39592 /* FALLTHRU */
39593
39594 case V4DFmode:
39595 case V4DImode:
39596 case V8SFmode:
39597 case V8SImode:
39598 case V2DFmode:
39599 case V2DImode:
39600 case V4SFmode:
39601 case V4SImode:
39602 case V16SImode:
39603 case V8DImode:
39604 case V16SFmode:
39605 case V8DFmode:
39606 return ix86_vector_duplicate_value (mode, target, val);
39607
39608 case V4HImode:
39609 if (!mmx_ok)
39610 return false;
39611 if (TARGET_SSE || TARGET_3DNOW_A)
39612 {
39613 rtx x;
39614
39615 val = gen_lowpart (SImode, val);
39616 x = gen_rtx_TRUNCATE (HImode, val);
39617 x = gen_rtx_VEC_DUPLICATE (mode, x);
39618 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39619 return true;
39620 }
39621 goto widen;
39622
39623 case V8QImode:
39624 if (!mmx_ok)
39625 return false;
39626 goto widen;
39627
39628 case V8HImode:
39629 if (TARGET_SSE2)
39630 {
39631 struct expand_vec_perm_d dperm;
39632 rtx tmp1, tmp2;
39633
39634 permute:
39635 memset (&dperm, 0, sizeof (dperm));
39636 dperm.target = target;
39637 dperm.vmode = mode;
39638 dperm.nelt = GET_MODE_NUNITS (mode);
39639 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39640 dperm.one_operand_p = true;
39641
39642 /* Extend to SImode using a paradoxical SUBREG. */
39643 tmp1 = gen_reg_rtx (SImode);
39644 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39645
39646 /* Insert the SImode value as low element of a V4SImode vector. */
39647 tmp2 = gen_reg_rtx (V4SImode);
39648 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39649 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39650
39651 ok = (expand_vec_perm_1 (&dperm)
39652 || expand_vec_perm_broadcast_1 (&dperm));
39653 gcc_assert (ok);
39654 return ok;
39655 }
39656 goto widen;
39657
39658 case V16QImode:
39659 if (TARGET_SSE2)
39660 goto permute;
39661 goto widen;
39662
39663 widen:
39664 /* Replicate the value once into the next wider mode and recurse. */
39665 {
39666 enum machine_mode smode, wsmode, wvmode;
39667 rtx x;
39668
39669 smode = GET_MODE_INNER (mode);
39670 wvmode = get_mode_wider_vector (mode);
39671 wsmode = GET_MODE_INNER (wvmode);
39672
39673 val = convert_modes (wsmode, smode, val, true);
39674 x = expand_simple_binop (wsmode, ASHIFT, val,
39675 GEN_INT (GET_MODE_BITSIZE (smode)),
39676 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39677 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39678
39679 x = gen_reg_rtx (wvmode);
39680 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39681 gcc_assert (ok);
39682 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39683 return ok;
39684 }
39685
39686 case V16HImode:
39687 case V32QImode:
39688 {
39689 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39690 rtx x = gen_reg_rtx (hvmode);
39691
39692 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39693 gcc_assert (ok);
39694
39695 x = gen_rtx_VEC_CONCAT (mode, x, x);
39696 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39697 }
39698 return true;
39699
39700 default:
39701 return false;
39702 }
39703 }
39704
39705 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39706 whose ONE_VAR element is VAR, and other elements are zero. Return true
39707 if successful. */
39708
39709 static bool
39710 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39711 rtx target, rtx var, int one_var)
39712 {
39713 enum machine_mode vsimode;
39714 rtx new_target;
39715 rtx x, tmp;
39716 bool use_vector_set = false;
39717
39718 switch (mode)
39719 {
39720 case V2DImode:
39721 /* For SSE4.1, we normally use vector set. But if the second
39722 element is zero and inter-unit moves are OK, we use movq
39723 instead. */
39724 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39725 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39726 && one_var == 0));
39727 break;
39728 case V16QImode:
39729 case V4SImode:
39730 case V4SFmode:
39731 use_vector_set = TARGET_SSE4_1;
39732 break;
39733 case V8HImode:
39734 use_vector_set = TARGET_SSE2;
39735 break;
39736 case V4HImode:
39737 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39738 break;
39739 case V32QImode:
39740 case V16HImode:
39741 case V8SImode:
39742 case V8SFmode:
39743 case V4DFmode:
39744 use_vector_set = TARGET_AVX;
39745 break;
39746 case V4DImode:
39747 /* Use ix86_expand_vector_set in 64bit mode only. */
39748 use_vector_set = TARGET_AVX && TARGET_64BIT;
39749 break;
39750 default:
39751 break;
39752 }
39753
39754 if (use_vector_set)
39755 {
39756 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39757 var = force_reg (GET_MODE_INNER (mode), var);
39758 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39759 return true;
39760 }
39761
39762 switch (mode)
39763 {
39764 case V2SFmode:
39765 case V2SImode:
39766 if (!mmx_ok)
39767 return false;
39768 /* FALLTHRU */
39769
39770 case V2DFmode:
39771 case V2DImode:
39772 if (one_var != 0)
39773 return false;
39774 var = force_reg (GET_MODE_INNER (mode), var);
39775 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39776 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39777 return true;
39778
39779 case V4SFmode:
39780 case V4SImode:
39781 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39782 new_target = gen_reg_rtx (mode);
39783 else
39784 new_target = target;
39785 var = force_reg (GET_MODE_INNER (mode), var);
39786 x = gen_rtx_VEC_DUPLICATE (mode, var);
39787 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39788 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39789 if (one_var != 0)
39790 {
39791 /* We need to shuffle the value to the correct position, so
39792 create a new pseudo to store the intermediate result. */
39793
39794 /* With SSE2, we can use the integer shuffle insns. */
39795 if (mode != V4SFmode && TARGET_SSE2)
39796 {
39797 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39798 const1_rtx,
39799 GEN_INT (one_var == 1 ? 0 : 1),
39800 GEN_INT (one_var == 2 ? 0 : 1),
39801 GEN_INT (one_var == 3 ? 0 : 1)));
39802 if (target != new_target)
39803 emit_move_insn (target, new_target);
39804 return true;
39805 }
39806
39807 /* Otherwise convert the intermediate result to V4SFmode and
39808 use the SSE1 shuffle instructions. */
39809 if (mode != V4SFmode)
39810 {
39811 tmp = gen_reg_rtx (V4SFmode);
39812 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39813 }
39814 else
39815 tmp = new_target;
39816
39817 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39818 const1_rtx,
39819 GEN_INT (one_var == 1 ? 0 : 1),
39820 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39821 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39822
39823 if (mode != V4SFmode)
39824 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39825 else if (tmp != target)
39826 emit_move_insn (target, tmp);
39827 }
39828 else if (target != new_target)
39829 emit_move_insn (target, new_target);
39830 return true;
39831
39832 case V8HImode:
39833 case V16QImode:
39834 vsimode = V4SImode;
39835 goto widen;
39836 case V4HImode:
39837 case V8QImode:
39838 if (!mmx_ok)
39839 return false;
39840 vsimode = V2SImode;
39841 goto widen;
39842 widen:
39843 if (one_var != 0)
39844 return false;
39845
39846 /* Zero extend the variable element to SImode and recurse. */
39847 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39848
39849 x = gen_reg_rtx (vsimode);
39850 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39851 var, one_var))
39852 gcc_unreachable ();
39853
39854 emit_move_insn (target, gen_lowpart (mode, x));
39855 return true;
39856
39857 default:
39858 return false;
39859 }
39860 }
39861
39862 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39863 consisting of the values in VALS. It is known that all elements
39864 except ONE_VAR are constants. Return true if successful. */
39865
39866 static bool
39867 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39868 rtx target, rtx vals, int one_var)
39869 {
39870 rtx var = XVECEXP (vals, 0, one_var);
39871 enum machine_mode wmode;
39872 rtx const_vec, x;
39873
39874 const_vec = copy_rtx (vals);
39875 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39876 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39877
39878 switch (mode)
39879 {
39880 case V2DFmode:
39881 case V2DImode:
39882 case V2SFmode:
39883 case V2SImode:
39884 /* For the two element vectors, it's just as easy to use
39885 the general case. */
39886 return false;
39887
39888 case V4DImode:
39889 /* Use ix86_expand_vector_set in 64bit mode only. */
39890 if (!TARGET_64BIT)
39891 return false;
39892 case V4DFmode:
39893 case V8SFmode:
39894 case V8SImode:
39895 case V16HImode:
39896 case V32QImode:
39897 case V4SFmode:
39898 case V4SImode:
39899 case V8HImode:
39900 case V4HImode:
39901 break;
39902
39903 case V16QImode:
39904 if (TARGET_SSE4_1)
39905 break;
39906 wmode = V8HImode;
39907 goto widen;
39908 case V8QImode:
39909 wmode = V4HImode;
39910 goto widen;
39911 widen:
39912 /* There's no way to set one QImode entry easily. Combine
39913 the variable value with its adjacent constant value, and
39914 promote to an HImode set. */
39915 x = XVECEXP (vals, 0, one_var ^ 1);
39916 if (one_var & 1)
39917 {
39918 var = convert_modes (HImode, QImode, var, true);
39919 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39920 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39921 x = GEN_INT (INTVAL (x) & 0xff);
39922 }
39923 else
39924 {
39925 var = convert_modes (HImode, QImode, var, true);
39926 x = gen_int_mode (INTVAL (x) << 8, HImode);
39927 }
39928 if (x != const0_rtx)
39929 var = expand_simple_binop (HImode, IOR, var, x, var,
39930 1, OPTAB_LIB_WIDEN);
39931
39932 x = gen_reg_rtx (wmode);
39933 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39934 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39935
39936 emit_move_insn (target, gen_lowpart (mode, x));
39937 return true;
39938
39939 default:
39940 return false;
39941 }
39942
39943 emit_move_insn (target, const_vec);
39944 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39945 return true;
39946 }
39947
39948 /* A subroutine of ix86_expand_vector_init_general. Use vector
39949 concatenate to handle the most general case: all values variable,
39950 and none identical. */
39951
39952 static void
39953 ix86_expand_vector_init_concat (enum machine_mode mode,
39954 rtx target, rtx *ops, int n)
39955 {
39956 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39957 rtx first[16], second[8], third[4];
39958 rtvec v;
39959 int i, j;
39960
39961 switch (n)
39962 {
39963 case 2:
39964 switch (mode)
39965 {
39966 case V16SImode:
39967 cmode = V8SImode;
39968 break;
39969 case V16SFmode:
39970 cmode = V8SFmode;
39971 break;
39972 case V8DImode:
39973 cmode = V4DImode;
39974 break;
39975 case V8DFmode:
39976 cmode = V4DFmode;
39977 break;
39978 case V8SImode:
39979 cmode = V4SImode;
39980 break;
39981 case V8SFmode:
39982 cmode = V4SFmode;
39983 break;
39984 case V4DImode:
39985 cmode = V2DImode;
39986 break;
39987 case V4DFmode:
39988 cmode = V2DFmode;
39989 break;
39990 case V4SImode:
39991 cmode = V2SImode;
39992 break;
39993 case V4SFmode:
39994 cmode = V2SFmode;
39995 break;
39996 case V2DImode:
39997 cmode = DImode;
39998 break;
39999 case V2SImode:
40000 cmode = SImode;
40001 break;
40002 case V2DFmode:
40003 cmode = DFmode;
40004 break;
40005 case V2SFmode:
40006 cmode = SFmode;
40007 break;
40008 default:
40009 gcc_unreachable ();
40010 }
40011
40012 if (!register_operand (ops[1], cmode))
40013 ops[1] = force_reg (cmode, ops[1]);
40014 if (!register_operand (ops[0], cmode))
40015 ops[0] = force_reg (cmode, ops[0]);
40016 emit_insn (gen_rtx_SET (VOIDmode, target,
40017 gen_rtx_VEC_CONCAT (mode, ops[0],
40018 ops[1])));
40019 break;
40020
40021 case 4:
40022 switch (mode)
40023 {
40024 case V4DImode:
40025 cmode = V2DImode;
40026 break;
40027 case V4DFmode:
40028 cmode = V2DFmode;
40029 break;
40030 case V4SImode:
40031 cmode = V2SImode;
40032 break;
40033 case V4SFmode:
40034 cmode = V2SFmode;
40035 break;
40036 default:
40037 gcc_unreachable ();
40038 }
40039 goto half;
40040
40041 case 8:
40042 switch (mode)
40043 {
40044 case V8DImode:
40045 cmode = V2DImode;
40046 hmode = V4DImode;
40047 break;
40048 case V8DFmode:
40049 cmode = V2DFmode;
40050 hmode = V4DFmode;
40051 break;
40052 case V8SImode:
40053 cmode = V2SImode;
40054 hmode = V4SImode;
40055 break;
40056 case V8SFmode:
40057 cmode = V2SFmode;
40058 hmode = V4SFmode;
40059 break;
40060 default:
40061 gcc_unreachable ();
40062 }
40063 goto half;
40064
40065 case 16:
40066 switch (mode)
40067 {
40068 case V16SImode:
40069 cmode = V2SImode;
40070 hmode = V4SImode;
40071 gmode = V8SImode;
40072 break;
40073 case V16SFmode:
40074 cmode = V2SFmode;
40075 hmode = V4SFmode;
40076 gmode = V8SFmode;
40077 break;
40078 default:
40079 gcc_unreachable ();
40080 }
40081 goto half;
40082
40083 half:
40084 /* FIXME: We process inputs backward to help RA. PR 36222. */
40085 i = n - 1;
40086 j = (n >> 1) - 1;
40087 for (; i > 0; i -= 2, j--)
40088 {
40089 first[j] = gen_reg_rtx (cmode);
40090 v = gen_rtvec (2, ops[i - 1], ops[i]);
40091 ix86_expand_vector_init (false, first[j],
40092 gen_rtx_PARALLEL (cmode, v));
40093 }
40094
40095 n >>= 1;
40096 if (n > 4)
40097 {
40098 gcc_assert (hmode != VOIDmode);
40099 gcc_assert (gmode != VOIDmode);
40100 for (i = j = 0; i < n; i += 2, j++)
40101 {
40102 second[j] = gen_reg_rtx (hmode);
40103 ix86_expand_vector_init_concat (hmode, second [j],
40104 &first [i], 2);
40105 }
40106 n >>= 1;
40107 for (i = j = 0; i < n; i += 2, j++)
40108 {
40109 third[j] = gen_reg_rtx (gmode);
40110 ix86_expand_vector_init_concat (gmode, third[j],
40111 &second[i], 2);
40112 }
40113 n >>= 1;
40114 ix86_expand_vector_init_concat (mode, target, third, n);
40115 }
40116 else if (n > 2)
40117 {
40118 gcc_assert (hmode != VOIDmode);
40119 for (i = j = 0; i < n; i += 2, j++)
40120 {
40121 second[j] = gen_reg_rtx (hmode);
40122 ix86_expand_vector_init_concat (hmode, second [j],
40123 &first [i], 2);
40124 }
40125 n >>= 1;
40126 ix86_expand_vector_init_concat (mode, target, second, n);
40127 }
40128 else
40129 ix86_expand_vector_init_concat (mode, target, first, n);
40130 break;
40131
40132 default:
40133 gcc_unreachable ();
40134 }
40135 }
40136
40137 /* A subroutine of ix86_expand_vector_init_general. Use vector
40138 interleave to handle the most general case: all values variable,
40139 and none identical. */
40140
40141 static void
40142 ix86_expand_vector_init_interleave (enum machine_mode mode,
40143 rtx target, rtx *ops, int n)
40144 {
40145 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40146 int i, j;
40147 rtx op0, op1;
40148 rtx (*gen_load_even) (rtx, rtx, rtx);
40149 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40150 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40151
40152 switch (mode)
40153 {
40154 case V8HImode:
40155 gen_load_even = gen_vec_setv8hi;
40156 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40157 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40158 inner_mode = HImode;
40159 first_imode = V4SImode;
40160 second_imode = V2DImode;
40161 third_imode = VOIDmode;
40162 break;
40163 case V16QImode:
40164 gen_load_even = gen_vec_setv16qi;
40165 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40166 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40167 inner_mode = QImode;
40168 first_imode = V8HImode;
40169 second_imode = V4SImode;
40170 third_imode = V2DImode;
40171 break;
40172 default:
40173 gcc_unreachable ();
40174 }
40175
40176 for (i = 0; i < n; i++)
40177 {
40178 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40179 op0 = gen_reg_rtx (SImode);
40180 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40181
40182 /* Insert the SImode value as low element of V4SImode vector. */
40183 op1 = gen_reg_rtx (V4SImode);
40184 op0 = gen_rtx_VEC_MERGE (V4SImode,
40185 gen_rtx_VEC_DUPLICATE (V4SImode,
40186 op0),
40187 CONST0_RTX (V4SImode),
40188 const1_rtx);
40189 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40190
40191 /* Cast the V4SImode vector back to a vector in orignal mode. */
40192 op0 = gen_reg_rtx (mode);
40193 emit_move_insn (op0, gen_lowpart (mode, op1));
40194
40195 /* Load even elements into the second position. */
40196 emit_insn (gen_load_even (op0,
40197 force_reg (inner_mode,
40198 ops [i + i + 1]),
40199 const1_rtx));
40200
40201 /* Cast vector to FIRST_IMODE vector. */
40202 ops[i] = gen_reg_rtx (first_imode);
40203 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40204 }
40205
40206 /* Interleave low FIRST_IMODE vectors. */
40207 for (i = j = 0; i < n; i += 2, j++)
40208 {
40209 op0 = gen_reg_rtx (first_imode);
40210 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40211
40212 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40213 ops[j] = gen_reg_rtx (second_imode);
40214 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40215 }
40216
40217 /* Interleave low SECOND_IMODE vectors. */
40218 switch (second_imode)
40219 {
40220 case V4SImode:
40221 for (i = j = 0; i < n / 2; i += 2, j++)
40222 {
40223 op0 = gen_reg_rtx (second_imode);
40224 emit_insn (gen_interleave_second_low (op0, ops[i],
40225 ops[i + 1]));
40226
40227 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40228 vector. */
40229 ops[j] = gen_reg_rtx (third_imode);
40230 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40231 }
40232 second_imode = V2DImode;
40233 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40234 /* FALLTHRU */
40235
40236 case V2DImode:
40237 op0 = gen_reg_rtx (second_imode);
40238 emit_insn (gen_interleave_second_low (op0, ops[0],
40239 ops[1]));
40240
40241 /* Cast the SECOND_IMODE vector back to a vector on original
40242 mode. */
40243 emit_insn (gen_rtx_SET (VOIDmode, target,
40244 gen_lowpart (mode, op0)));
40245 break;
40246
40247 default:
40248 gcc_unreachable ();
40249 }
40250 }
40251
40252 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40253 all values variable, and none identical. */
40254
40255 static void
40256 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40257 rtx target, rtx vals)
40258 {
40259 rtx ops[64], op0, op1;
40260 enum machine_mode half_mode = VOIDmode;
40261 int n, i;
40262
40263 switch (mode)
40264 {
40265 case V2SFmode:
40266 case V2SImode:
40267 if (!mmx_ok && !TARGET_SSE)
40268 break;
40269 /* FALLTHRU */
40270
40271 case V16SImode:
40272 case V16SFmode:
40273 case V8DFmode:
40274 case V8DImode:
40275 case V8SFmode:
40276 case V8SImode:
40277 case V4DFmode:
40278 case V4DImode:
40279 case V4SFmode:
40280 case V4SImode:
40281 case V2DFmode:
40282 case V2DImode:
40283 n = GET_MODE_NUNITS (mode);
40284 for (i = 0; i < n; i++)
40285 ops[i] = XVECEXP (vals, 0, i);
40286 ix86_expand_vector_init_concat (mode, target, ops, n);
40287 return;
40288
40289 case V32QImode:
40290 half_mode = V16QImode;
40291 goto half;
40292
40293 case V16HImode:
40294 half_mode = V8HImode;
40295 goto half;
40296
40297 half:
40298 n = GET_MODE_NUNITS (mode);
40299 for (i = 0; i < n; i++)
40300 ops[i] = XVECEXP (vals, 0, i);
40301 op0 = gen_reg_rtx (half_mode);
40302 op1 = gen_reg_rtx (half_mode);
40303 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40304 n >> 2);
40305 ix86_expand_vector_init_interleave (half_mode, op1,
40306 &ops [n >> 1], n >> 2);
40307 emit_insn (gen_rtx_SET (VOIDmode, target,
40308 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40309 return;
40310
40311 case V16QImode:
40312 if (!TARGET_SSE4_1)
40313 break;
40314 /* FALLTHRU */
40315
40316 case V8HImode:
40317 if (!TARGET_SSE2)
40318 break;
40319
40320 /* Don't use ix86_expand_vector_init_interleave if we can't
40321 move from GPR to SSE register directly. */
40322 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40323 break;
40324
40325 n = GET_MODE_NUNITS (mode);
40326 for (i = 0; i < n; i++)
40327 ops[i] = XVECEXP (vals, 0, i);
40328 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40329 return;
40330
40331 case V4HImode:
40332 case V8QImode:
40333 break;
40334
40335 default:
40336 gcc_unreachable ();
40337 }
40338
40339 {
40340 int i, j, n_elts, n_words, n_elt_per_word;
40341 enum machine_mode inner_mode;
40342 rtx words[4], shift;
40343
40344 inner_mode = GET_MODE_INNER (mode);
40345 n_elts = GET_MODE_NUNITS (mode);
40346 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40347 n_elt_per_word = n_elts / n_words;
40348 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40349
40350 for (i = 0; i < n_words; ++i)
40351 {
40352 rtx word = NULL_RTX;
40353
40354 for (j = 0; j < n_elt_per_word; ++j)
40355 {
40356 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40357 elt = convert_modes (word_mode, inner_mode, elt, true);
40358
40359 if (j == 0)
40360 word = elt;
40361 else
40362 {
40363 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40364 word, 1, OPTAB_LIB_WIDEN);
40365 word = expand_simple_binop (word_mode, IOR, word, elt,
40366 word, 1, OPTAB_LIB_WIDEN);
40367 }
40368 }
40369
40370 words[i] = word;
40371 }
40372
40373 if (n_words == 1)
40374 emit_move_insn (target, gen_lowpart (mode, words[0]));
40375 else if (n_words == 2)
40376 {
40377 rtx tmp = gen_reg_rtx (mode);
40378 emit_clobber (tmp);
40379 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40380 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40381 emit_move_insn (target, tmp);
40382 }
40383 else if (n_words == 4)
40384 {
40385 rtx tmp = gen_reg_rtx (V4SImode);
40386 gcc_assert (word_mode == SImode);
40387 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40388 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40389 emit_move_insn (target, gen_lowpart (mode, tmp));
40390 }
40391 else
40392 gcc_unreachable ();
40393 }
40394 }
40395
40396 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40397 instructions unless MMX_OK is true. */
40398
40399 void
40400 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40401 {
40402 enum machine_mode mode = GET_MODE (target);
40403 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40404 int n_elts = GET_MODE_NUNITS (mode);
40405 int n_var = 0, one_var = -1;
40406 bool all_same = true, all_const_zero = true;
40407 int i;
40408 rtx x;
40409
40410 for (i = 0; i < n_elts; ++i)
40411 {
40412 x = XVECEXP (vals, 0, i);
40413 if (!(CONST_INT_P (x)
40414 || GET_CODE (x) == CONST_DOUBLE
40415 || GET_CODE (x) == CONST_FIXED))
40416 n_var++, one_var = i;
40417 else if (x != CONST0_RTX (inner_mode))
40418 all_const_zero = false;
40419 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40420 all_same = false;
40421 }
40422
40423 /* Constants are best loaded from the constant pool. */
40424 if (n_var == 0)
40425 {
40426 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40427 return;
40428 }
40429
40430 /* If all values are identical, broadcast the value. */
40431 if (all_same
40432 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40433 XVECEXP (vals, 0, 0)))
40434 return;
40435
40436 /* Values where only one field is non-constant are best loaded from
40437 the pool and overwritten via move later. */
40438 if (n_var == 1)
40439 {
40440 if (all_const_zero
40441 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40442 XVECEXP (vals, 0, one_var),
40443 one_var))
40444 return;
40445
40446 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40447 return;
40448 }
40449
40450 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40451 }
40452
40453 void
40454 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40455 {
40456 enum machine_mode mode = GET_MODE (target);
40457 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40458 enum machine_mode half_mode;
40459 bool use_vec_merge = false;
40460 rtx tmp;
40461 static rtx (*gen_extract[6][2]) (rtx, rtx)
40462 = {
40463 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40464 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40465 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40466 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40467 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40468 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40469 };
40470 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40471 = {
40472 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40473 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40474 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40475 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40476 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40477 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40478 };
40479 int i, j, n;
40480
40481 switch (mode)
40482 {
40483 case V2SFmode:
40484 case V2SImode:
40485 if (mmx_ok)
40486 {
40487 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40488 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40489 if (elt == 0)
40490 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40491 else
40492 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40493 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40494 return;
40495 }
40496 break;
40497
40498 case V2DImode:
40499 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40500 if (use_vec_merge)
40501 break;
40502
40503 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40504 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40505 if (elt == 0)
40506 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40507 else
40508 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40509 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40510 return;
40511
40512 case V2DFmode:
40513 {
40514 rtx op0, op1;
40515
40516 /* For the two element vectors, we implement a VEC_CONCAT with
40517 the extraction of the other element. */
40518
40519 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40520 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40521
40522 if (elt == 0)
40523 op0 = val, op1 = tmp;
40524 else
40525 op0 = tmp, op1 = val;
40526
40527 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40528 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40529 }
40530 return;
40531
40532 case V4SFmode:
40533 use_vec_merge = TARGET_SSE4_1;
40534 if (use_vec_merge)
40535 break;
40536
40537 switch (elt)
40538 {
40539 case 0:
40540 use_vec_merge = true;
40541 break;
40542
40543 case 1:
40544 /* tmp = target = A B C D */
40545 tmp = copy_to_reg (target);
40546 /* target = A A B B */
40547 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40548 /* target = X A B B */
40549 ix86_expand_vector_set (false, target, val, 0);
40550 /* target = A X C D */
40551 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40552 const1_rtx, const0_rtx,
40553 GEN_INT (2+4), GEN_INT (3+4)));
40554 return;
40555
40556 case 2:
40557 /* tmp = target = A B C D */
40558 tmp = copy_to_reg (target);
40559 /* tmp = X B C D */
40560 ix86_expand_vector_set (false, tmp, val, 0);
40561 /* target = A B X D */
40562 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40563 const0_rtx, const1_rtx,
40564 GEN_INT (0+4), GEN_INT (3+4)));
40565 return;
40566
40567 case 3:
40568 /* tmp = target = A B C D */
40569 tmp = copy_to_reg (target);
40570 /* tmp = X B C D */
40571 ix86_expand_vector_set (false, tmp, val, 0);
40572 /* target = A B X D */
40573 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40574 const0_rtx, const1_rtx,
40575 GEN_INT (2+4), GEN_INT (0+4)));
40576 return;
40577
40578 default:
40579 gcc_unreachable ();
40580 }
40581 break;
40582
40583 case V4SImode:
40584 use_vec_merge = TARGET_SSE4_1;
40585 if (use_vec_merge)
40586 break;
40587
40588 /* Element 0 handled by vec_merge below. */
40589 if (elt == 0)
40590 {
40591 use_vec_merge = true;
40592 break;
40593 }
40594
40595 if (TARGET_SSE2)
40596 {
40597 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40598 store into element 0, then shuffle them back. */
40599
40600 rtx order[4];
40601
40602 order[0] = GEN_INT (elt);
40603 order[1] = const1_rtx;
40604 order[2] = const2_rtx;
40605 order[3] = GEN_INT (3);
40606 order[elt] = const0_rtx;
40607
40608 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40609 order[1], order[2], order[3]));
40610
40611 ix86_expand_vector_set (false, target, val, 0);
40612
40613 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40614 order[1], order[2], order[3]));
40615 }
40616 else
40617 {
40618 /* For SSE1, we have to reuse the V4SF code. */
40619 rtx t = gen_reg_rtx (V4SFmode);
40620 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40621 emit_move_insn (target, gen_lowpart (mode, t));
40622 }
40623 return;
40624
40625 case V8HImode:
40626 use_vec_merge = TARGET_SSE2;
40627 break;
40628 case V4HImode:
40629 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40630 break;
40631
40632 case V16QImode:
40633 use_vec_merge = TARGET_SSE4_1;
40634 break;
40635
40636 case V8QImode:
40637 break;
40638
40639 case V32QImode:
40640 half_mode = V16QImode;
40641 j = 0;
40642 n = 16;
40643 goto half;
40644
40645 case V16HImode:
40646 half_mode = V8HImode;
40647 j = 1;
40648 n = 8;
40649 goto half;
40650
40651 case V8SImode:
40652 half_mode = V4SImode;
40653 j = 2;
40654 n = 4;
40655 goto half;
40656
40657 case V4DImode:
40658 half_mode = V2DImode;
40659 j = 3;
40660 n = 2;
40661 goto half;
40662
40663 case V8SFmode:
40664 half_mode = V4SFmode;
40665 j = 4;
40666 n = 4;
40667 goto half;
40668
40669 case V4DFmode:
40670 half_mode = V2DFmode;
40671 j = 5;
40672 n = 2;
40673 goto half;
40674
40675 half:
40676 /* Compute offset. */
40677 i = elt / n;
40678 elt %= n;
40679
40680 gcc_assert (i <= 1);
40681
40682 /* Extract the half. */
40683 tmp = gen_reg_rtx (half_mode);
40684 emit_insn (gen_extract[j][i] (tmp, target));
40685
40686 /* Put val in tmp at elt. */
40687 ix86_expand_vector_set (false, tmp, val, elt);
40688
40689 /* Put it back. */
40690 emit_insn (gen_insert[j][i] (target, target, tmp));
40691 return;
40692
40693 default:
40694 break;
40695 }
40696
40697 if (use_vec_merge)
40698 {
40699 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40700 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40701 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40702 }
40703 else
40704 {
40705 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40706
40707 emit_move_insn (mem, target);
40708
40709 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40710 emit_move_insn (tmp, val);
40711
40712 emit_move_insn (target, mem);
40713 }
40714 }
40715
40716 void
40717 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40718 {
40719 enum machine_mode mode = GET_MODE (vec);
40720 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40721 bool use_vec_extr = false;
40722 rtx tmp;
40723
40724 switch (mode)
40725 {
40726 case V2SImode:
40727 case V2SFmode:
40728 if (!mmx_ok)
40729 break;
40730 /* FALLTHRU */
40731
40732 case V2DFmode:
40733 case V2DImode:
40734 use_vec_extr = true;
40735 break;
40736
40737 case V4SFmode:
40738 use_vec_extr = TARGET_SSE4_1;
40739 if (use_vec_extr)
40740 break;
40741
40742 switch (elt)
40743 {
40744 case 0:
40745 tmp = vec;
40746 break;
40747
40748 case 1:
40749 case 3:
40750 tmp = gen_reg_rtx (mode);
40751 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40752 GEN_INT (elt), GEN_INT (elt),
40753 GEN_INT (elt+4), GEN_INT (elt+4)));
40754 break;
40755
40756 case 2:
40757 tmp = gen_reg_rtx (mode);
40758 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40759 break;
40760
40761 default:
40762 gcc_unreachable ();
40763 }
40764 vec = tmp;
40765 use_vec_extr = true;
40766 elt = 0;
40767 break;
40768
40769 case V4SImode:
40770 use_vec_extr = TARGET_SSE4_1;
40771 if (use_vec_extr)
40772 break;
40773
40774 if (TARGET_SSE2)
40775 {
40776 switch (elt)
40777 {
40778 case 0:
40779 tmp = vec;
40780 break;
40781
40782 case 1:
40783 case 3:
40784 tmp = gen_reg_rtx (mode);
40785 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40786 GEN_INT (elt), GEN_INT (elt),
40787 GEN_INT (elt), GEN_INT (elt)));
40788 break;
40789
40790 case 2:
40791 tmp = gen_reg_rtx (mode);
40792 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40793 break;
40794
40795 default:
40796 gcc_unreachable ();
40797 }
40798 vec = tmp;
40799 use_vec_extr = true;
40800 elt = 0;
40801 }
40802 else
40803 {
40804 /* For SSE1, we have to reuse the V4SF code. */
40805 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40806 gen_lowpart (V4SFmode, vec), elt);
40807 return;
40808 }
40809 break;
40810
40811 case V8HImode:
40812 use_vec_extr = TARGET_SSE2;
40813 break;
40814 case V4HImode:
40815 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40816 break;
40817
40818 case V16QImode:
40819 use_vec_extr = TARGET_SSE4_1;
40820 break;
40821
40822 case V8SFmode:
40823 if (TARGET_AVX)
40824 {
40825 tmp = gen_reg_rtx (V4SFmode);
40826 if (elt < 4)
40827 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40828 else
40829 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40830 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40831 return;
40832 }
40833 break;
40834
40835 case V4DFmode:
40836 if (TARGET_AVX)
40837 {
40838 tmp = gen_reg_rtx (V2DFmode);
40839 if (elt < 2)
40840 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40841 else
40842 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40843 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40844 return;
40845 }
40846 break;
40847
40848 case V32QImode:
40849 if (TARGET_AVX)
40850 {
40851 tmp = gen_reg_rtx (V16QImode);
40852 if (elt < 16)
40853 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40854 else
40855 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40856 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40857 return;
40858 }
40859 break;
40860
40861 case V16HImode:
40862 if (TARGET_AVX)
40863 {
40864 tmp = gen_reg_rtx (V8HImode);
40865 if (elt < 8)
40866 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40867 else
40868 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40869 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40870 return;
40871 }
40872 break;
40873
40874 case V8SImode:
40875 if (TARGET_AVX)
40876 {
40877 tmp = gen_reg_rtx (V4SImode);
40878 if (elt < 4)
40879 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40880 else
40881 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40882 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40883 return;
40884 }
40885 break;
40886
40887 case V4DImode:
40888 if (TARGET_AVX)
40889 {
40890 tmp = gen_reg_rtx (V2DImode);
40891 if (elt < 2)
40892 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40893 else
40894 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40895 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40896 return;
40897 }
40898 break;
40899
40900 case V16SFmode:
40901 tmp = gen_reg_rtx (V8SFmode);
40902 if (elt < 8)
40903 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40904 else
40905 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40906 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40907 return;
40908
40909 case V8DFmode:
40910 tmp = gen_reg_rtx (V4DFmode);
40911 if (elt < 4)
40912 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40913 else
40914 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40915 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40916 return;
40917
40918 case V16SImode:
40919 tmp = gen_reg_rtx (V8SImode);
40920 if (elt < 8)
40921 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40922 else
40923 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40924 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40925 return;
40926
40927 case V8DImode:
40928 tmp = gen_reg_rtx (V4DImode);
40929 if (elt < 4)
40930 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40931 else
40932 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40933 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40934 return;
40935
40936 case V8QImode:
40937 /* ??? Could extract the appropriate HImode element and shift. */
40938 default:
40939 break;
40940 }
40941
40942 if (use_vec_extr)
40943 {
40944 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40945 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40946
40947 /* Let the rtl optimizers know about the zero extension performed. */
40948 if (inner_mode == QImode || inner_mode == HImode)
40949 {
40950 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40951 target = gen_lowpart (SImode, target);
40952 }
40953
40954 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40955 }
40956 else
40957 {
40958 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40959
40960 emit_move_insn (mem, vec);
40961
40962 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40963 emit_move_insn (target, tmp);
40964 }
40965 }
40966
40967 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40968 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40969 The upper bits of DEST are undefined, though they shouldn't cause
40970 exceptions (some bits from src or all zeros are ok). */
40971
40972 static void
40973 emit_reduc_half (rtx dest, rtx src, int i)
40974 {
40975 rtx tem, d = dest;
40976 switch (GET_MODE (src))
40977 {
40978 case V4SFmode:
40979 if (i == 128)
40980 tem = gen_sse_movhlps (dest, src, src);
40981 else
40982 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40983 GEN_INT (1 + 4), GEN_INT (1 + 4));
40984 break;
40985 case V2DFmode:
40986 tem = gen_vec_interleave_highv2df (dest, src, src);
40987 break;
40988 case V16QImode:
40989 case V8HImode:
40990 case V4SImode:
40991 case V2DImode:
40992 d = gen_reg_rtx (V1TImode);
40993 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40994 GEN_INT (i / 2));
40995 break;
40996 case V8SFmode:
40997 if (i == 256)
40998 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40999 else
41000 tem = gen_avx_shufps256 (dest, src, src,
41001 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41002 break;
41003 case V4DFmode:
41004 if (i == 256)
41005 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41006 else
41007 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41008 break;
41009 case V32QImode:
41010 case V16HImode:
41011 case V8SImode:
41012 case V4DImode:
41013 if (i == 256)
41014 {
41015 if (GET_MODE (dest) != V4DImode)
41016 d = gen_reg_rtx (V4DImode);
41017 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41018 gen_lowpart (V4DImode, src),
41019 const1_rtx);
41020 }
41021 else
41022 {
41023 d = gen_reg_rtx (V2TImode);
41024 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41025 GEN_INT (i / 2));
41026 }
41027 break;
41028 case V16SImode:
41029 case V16SFmode:
41030 case V8DImode:
41031 case V8DFmode:
41032 if (i > 128)
41033 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41034 gen_lowpart (V16SImode, src),
41035 gen_lowpart (V16SImode, src),
41036 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41037 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41038 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41039 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41040 GEN_INT (0xC), GEN_INT (0xD),
41041 GEN_INT (0xE), GEN_INT (0xF),
41042 GEN_INT (0x10), GEN_INT (0x11),
41043 GEN_INT (0x12), GEN_INT (0x13),
41044 GEN_INT (0x14), GEN_INT (0x15),
41045 GEN_INT (0x16), GEN_INT (0x17));
41046 else
41047 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41048 gen_lowpart (V16SImode, src),
41049 GEN_INT (i == 128 ? 0x2 : 0x1),
41050 GEN_INT (0x3),
41051 GEN_INT (0x3),
41052 GEN_INT (0x3),
41053 GEN_INT (i == 128 ? 0x6 : 0x5),
41054 GEN_INT (0x7),
41055 GEN_INT (0x7),
41056 GEN_INT (0x7),
41057 GEN_INT (i == 128 ? 0xA : 0x9),
41058 GEN_INT (0xB),
41059 GEN_INT (0xB),
41060 GEN_INT (0xB),
41061 GEN_INT (i == 128 ? 0xE : 0xD),
41062 GEN_INT (0xF),
41063 GEN_INT (0xF),
41064 GEN_INT (0xF));
41065 break;
41066 default:
41067 gcc_unreachable ();
41068 }
41069 emit_insn (tem);
41070 if (d != dest)
41071 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41072 }
41073
41074 /* Expand a vector reduction. FN is the binary pattern to reduce;
41075 DEST is the destination; IN is the input vector. */
41076
41077 void
41078 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41079 {
41080 rtx half, dst, vec = in;
41081 enum machine_mode mode = GET_MODE (in);
41082 int i;
41083
41084 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41085 if (TARGET_SSE4_1
41086 && mode == V8HImode
41087 && fn == gen_uminv8hi3)
41088 {
41089 emit_insn (gen_sse4_1_phminposuw (dest, in));
41090 return;
41091 }
41092
41093 for (i = GET_MODE_BITSIZE (mode);
41094 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41095 i >>= 1)
41096 {
41097 half = gen_reg_rtx (mode);
41098 emit_reduc_half (half, vec, i);
41099 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41100 dst = dest;
41101 else
41102 dst = gen_reg_rtx (mode);
41103 emit_insn (fn (dst, half, vec));
41104 vec = dst;
41105 }
41106 }
41107 \f
41108 /* Target hook for scalar_mode_supported_p. */
41109 static bool
41110 ix86_scalar_mode_supported_p (enum machine_mode mode)
41111 {
41112 if (DECIMAL_FLOAT_MODE_P (mode))
41113 return default_decimal_float_supported_p ();
41114 else if (mode == TFmode)
41115 return true;
41116 else
41117 return default_scalar_mode_supported_p (mode);
41118 }
41119
41120 /* Implements target hook vector_mode_supported_p. */
41121 static bool
41122 ix86_vector_mode_supported_p (enum machine_mode mode)
41123 {
41124 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41125 return true;
41126 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41127 return true;
41128 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41129 return true;
41130 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41131 return true;
41132 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41133 return true;
41134 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41135 return true;
41136 return false;
41137 }
41138
41139 /* Target hook for c_mode_for_suffix. */
41140 static enum machine_mode
41141 ix86_c_mode_for_suffix (char suffix)
41142 {
41143 if (suffix == 'q')
41144 return TFmode;
41145 if (suffix == 'w')
41146 return XFmode;
41147
41148 return VOIDmode;
41149 }
41150
41151 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41152
41153 We do this in the new i386 backend to maintain source compatibility
41154 with the old cc0-based compiler. */
41155
41156 static tree
41157 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41158 {
41159 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41160 clobbers);
41161 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41162 clobbers);
41163 return clobbers;
41164 }
41165
41166 /* Implements target vector targetm.asm.encode_section_info. */
41167
41168 static void ATTRIBUTE_UNUSED
41169 ix86_encode_section_info (tree decl, rtx rtl, int first)
41170 {
41171 default_encode_section_info (decl, rtl, first);
41172
41173 if (TREE_CODE (decl) == VAR_DECL
41174 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41175 && ix86_in_large_data_p (decl))
41176 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41177 }
41178
41179 /* Worker function for REVERSE_CONDITION. */
41180
41181 enum rtx_code
41182 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41183 {
41184 return (mode != CCFPmode && mode != CCFPUmode
41185 ? reverse_condition (code)
41186 : reverse_condition_maybe_unordered (code));
41187 }
41188
41189 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41190 to OPERANDS[0]. */
41191
41192 const char *
41193 output_387_reg_move (rtx insn, rtx *operands)
41194 {
41195 if (REG_P (operands[0]))
41196 {
41197 if (REG_P (operands[1])
41198 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41199 {
41200 if (REGNO (operands[0]) == FIRST_STACK_REG)
41201 return output_387_ffreep (operands, 0);
41202 return "fstp\t%y0";
41203 }
41204 if (STACK_TOP_P (operands[0]))
41205 return "fld%Z1\t%y1";
41206 return "fst\t%y0";
41207 }
41208 else if (MEM_P (operands[0]))
41209 {
41210 gcc_assert (REG_P (operands[1]));
41211 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41212 return "fstp%Z0\t%y0";
41213 else
41214 {
41215 /* There is no non-popping store to memory for XFmode.
41216 So if we need one, follow the store with a load. */
41217 if (GET_MODE (operands[0]) == XFmode)
41218 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41219 else
41220 return "fst%Z0\t%y0";
41221 }
41222 }
41223 else
41224 gcc_unreachable();
41225 }
41226
41227 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41228 FP status register is set. */
41229
41230 void
41231 ix86_emit_fp_unordered_jump (rtx label)
41232 {
41233 rtx reg = gen_reg_rtx (HImode);
41234 rtx temp;
41235
41236 emit_insn (gen_x86_fnstsw_1 (reg));
41237
41238 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41239 {
41240 emit_insn (gen_x86_sahf_1 (reg));
41241
41242 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41243 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41244 }
41245 else
41246 {
41247 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41248
41249 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41250 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41251 }
41252
41253 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41254 gen_rtx_LABEL_REF (VOIDmode, label),
41255 pc_rtx);
41256 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41257
41258 emit_jump_insn (temp);
41259 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41260 }
41261
41262 /* Output code to perform a log1p XFmode calculation. */
41263
41264 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41265 {
41266 rtx label1 = gen_label_rtx ();
41267 rtx label2 = gen_label_rtx ();
41268
41269 rtx tmp = gen_reg_rtx (XFmode);
41270 rtx tmp2 = gen_reg_rtx (XFmode);
41271 rtx test;
41272
41273 emit_insn (gen_absxf2 (tmp, op1));
41274 test = gen_rtx_GE (VOIDmode, tmp,
41275 CONST_DOUBLE_FROM_REAL_VALUE (
41276 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41277 XFmode));
41278 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41279
41280 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41281 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41282 emit_jump (label2);
41283
41284 emit_label (label1);
41285 emit_move_insn (tmp, CONST1_RTX (XFmode));
41286 emit_insn (gen_addxf3 (tmp, op1, tmp));
41287 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41288 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41289
41290 emit_label (label2);
41291 }
41292
41293 /* Emit code for round calculation. */
41294 void ix86_emit_i387_round (rtx op0, rtx op1)
41295 {
41296 enum machine_mode inmode = GET_MODE (op1);
41297 enum machine_mode outmode = GET_MODE (op0);
41298 rtx e1, e2, res, tmp, tmp1, half;
41299 rtx scratch = gen_reg_rtx (HImode);
41300 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41301 rtx jump_label = gen_label_rtx ();
41302 rtx insn;
41303 rtx (*gen_abs) (rtx, rtx);
41304 rtx (*gen_neg) (rtx, rtx);
41305
41306 switch (inmode)
41307 {
41308 case SFmode:
41309 gen_abs = gen_abssf2;
41310 break;
41311 case DFmode:
41312 gen_abs = gen_absdf2;
41313 break;
41314 case XFmode:
41315 gen_abs = gen_absxf2;
41316 break;
41317 default:
41318 gcc_unreachable ();
41319 }
41320
41321 switch (outmode)
41322 {
41323 case SFmode:
41324 gen_neg = gen_negsf2;
41325 break;
41326 case DFmode:
41327 gen_neg = gen_negdf2;
41328 break;
41329 case XFmode:
41330 gen_neg = gen_negxf2;
41331 break;
41332 case HImode:
41333 gen_neg = gen_neghi2;
41334 break;
41335 case SImode:
41336 gen_neg = gen_negsi2;
41337 break;
41338 case DImode:
41339 gen_neg = gen_negdi2;
41340 break;
41341 default:
41342 gcc_unreachable ();
41343 }
41344
41345 e1 = gen_reg_rtx (inmode);
41346 e2 = gen_reg_rtx (inmode);
41347 res = gen_reg_rtx (outmode);
41348
41349 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41350
41351 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41352
41353 /* scratch = fxam(op1) */
41354 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41355 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41356 UNSPEC_FXAM)));
41357 /* e1 = fabs(op1) */
41358 emit_insn (gen_abs (e1, op1));
41359
41360 /* e2 = e1 + 0.5 */
41361 half = force_reg (inmode, half);
41362 emit_insn (gen_rtx_SET (VOIDmode, e2,
41363 gen_rtx_PLUS (inmode, e1, half)));
41364
41365 /* res = floor(e2) */
41366 if (inmode != XFmode)
41367 {
41368 tmp1 = gen_reg_rtx (XFmode);
41369
41370 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41371 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41372 }
41373 else
41374 tmp1 = e2;
41375
41376 switch (outmode)
41377 {
41378 case SFmode:
41379 case DFmode:
41380 {
41381 rtx tmp0 = gen_reg_rtx (XFmode);
41382
41383 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41384
41385 emit_insn (gen_rtx_SET (VOIDmode, res,
41386 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41387 UNSPEC_TRUNC_NOOP)));
41388 }
41389 break;
41390 case XFmode:
41391 emit_insn (gen_frndintxf2_floor (res, tmp1));
41392 break;
41393 case HImode:
41394 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41395 break;
41396 case SImode:
41397 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41398 break;
41399 case DImode:
41400 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41401 break;
41402 default:
41403 gcc_unreachable ();
41404 }
41405
41406 /* flags = signbit(a) */
41407 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41408
41409 /* if (flags) then res = -res */
41410 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41411 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41412 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41413 pc_rtx);
41414 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41415 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41416 JUMP_LABEL (insn) = jump_label;
41417
41418 emit_insn (gen_neg (res, res));
41419
41420 emit_label (jump_label);
41421 LABEL_NUSES (jump_label) = 1;
41422
41423 emit_move_insn (op0, res);
41424 }
41425
41426 /* Output code to perform a Newton-Rhapson approximation of a single precision
41427 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41428
41429 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41430 {
41431 rtx x0, x1, e0, e1;
41432
41433 x0 = gen_reg_rtx (mode);
41434 e0 = gen_reg_rtx (mode);
41435 e1 = gen_reg_rtx (mode);
41436 x1 = gen_reg_rtx (mode);
41437
41438 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41439
41440 b = force_reg (mode, b);
41441
41442 /* x0 = rcp(b) estimate */
41443 if (mode == V16SFmode || mode == V8DFmode)
41444 emit_insn (gen_rtx_SET (VOIDmode, x0,
41445 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41446 UNSPEC_RCP14)));
41447 else
41448 emit_insn (gen_rtx_SET (VOIDmode, x0,
41449 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41450 UNSPEC_RCP)));
41451
41452 /* e0 = x0 * b */
41453 emit_insn (gen_rtx_SET (VOIDmode, e0,
41454 gen_rtx_MULT (mode, x0, b)));
41455
41456 /* e0 = x0 * e0 */
41457 emit_insn (gen_rtx_SET (VOIDmode, e0,
41458 gen_rtx_MULT (mode, x0, e0)));
41459
41460 /* e1 = x0 + x0 */
41461 emit_insn (gen_rtx_SET (VOIDmode, e1,
41462 gen_rtx_PLUS (mode, x0, x0)));
41463
41464 /* x1 = e1 - e0 */
41465 emit_insn (gen_rtx_SET (VOIDmode, x1,
41466 gen_rtx_MINUS (mode, e1, e0)));
41467
41468 /* res = a * x1 */
41469 emit_insn (gen_rtx_SET (VOIDmode, res,
41470 gen_rtx_MULT (mode, a, x1)));
41471 }
41472
41473 /* Output code to perform a Newton-Rhapson approximation of a
41474 single precision floating point [reciprocal] square root. */
41475
41476 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41477 bool recip)
41478 {
41479 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41480 REAL_VALUE_TYPE r;
41481 int unspec;
41482
41483 x0 = gen_reg_rtx (mode);
41484 e0 = gen_reg_rtx (mode);
41485 e1 = gen_reg_rtx (mode);
41486 e2 = gen_reg_rtx (mode);
41487 e3 = gen_reg_rtx (mode);
41488
41489 real_from_integer (&r, VOIDmode, -3, SIGNED);
41490 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41491
41492 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41493 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41494 unspec = UNSPEC_RSQRT;
41495
41496 if (VECTOR_MODE_P (mode))
41497 {
41498 mthree = ix86_build_const_vector (mode, true, mthree);
41499 mhalf = ix86_build_const_vector (mode, true, mhalf);
41500 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41501 if (GET_MODE_SIZE (mode) == 64)
41502 unspec = UNSPEC_RSQRT14;
41503 }
41504
41505 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41506 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41507
41508 a = force_reg (mode, a);
41509
41510 /* x0 = rsqrt(a) estimate */
41511 emit_insn (gen_rtx_SET (VOIDmode, x0,
41512 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41513 unspec)));
41514
41515 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41516 if (!recip)
41517 {
41518 rtx zero, mask;
41519
41520 zero = gen_reg_rtx (mode);
41521 mask = gen_reg_rtx (mode);
41522
41523 zero = force_reg (mode, CONST0_RTX(mode));
41524
41525 /* Handle masked compare. */
41526 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41527 {
41528 mask = gen_reg_rtx (HImode);
41529 /* Imm value 0x4 corresponds to not-equal comparison. */
41530 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41531 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41532 }
41533 else
41534 {
41535 emit_insn (gen_rtx_SET (VOIDmode, mask,
41536 gen_rtx_NE (mode, zero, a)));
41537
41538 emit_insn (gen_rtx_SET (VOIDmode, x0,
41539 gen_rtx_AND (mode, x0, mask)));
41540 }
41541 }
41542
41543 /* e0 = x0 * a */
41544 emit_insn (gen_rtx_SET (VOIDmode, e0,
41545 gen_rtx_MULT (mode, x0, a)));
41546 /* e1 = e0 * x0 */
41547 emit_insn (gen_rtx_SET (VOIDmode, e1,
41548 gen_rtx_MULT (mode, e0, x0)));
41549
41550 /* e2 = e1 - 3. */
41551 mthree = force_reg (mode, mthree);
41552 emit_insn (gen_rtx_SET (VOIDmode, e2,
41553 gen_rtx_PLUS (mode, e1, mthree)));
41554
41555 mhalf = force_reg (mode, mhalf);
41556 if (recip)
41557 /* e3 = -.5 * x0 */
41558 emit_insn (gen_rtx_SET (VOIDmode, e3,
41559 gen_rtx_MULT (mode, x0, mhalf)));
41560 else
41561 /* e3 = -.5 * e0 */
41562 emit_insn (gen_rtx_SET (VOIDmode, e3,
41563 gen_rtx_MULT (mode, e0, mhalf)));
41564 /* ret = e2 * e3 */
41565 emit_insn (gen_rtx_SET (VOIDmode, res,
41566 gen_rtx_MULT (mode, e2, e3)));
41567 }
41568
41569 #ifdef TARGET_SOLARIS
41570 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41571
41572 static void
41573 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41574 tree decl)
41575 {
41576 /* With Binutils 2.15, the "@unwind" marker must be specified on
41577 every occurrence of the ".eh_frame" section, not just the first
41578 one. */
41579 if (TARGET_64BIT
41580 && strcmp (name, ".eh_frame") == 0)
41581 {
41582 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41583 flags & SECTION_WRITE ? "aw" : "a");
41584 return;
41585 }
41586
41587 #ifndef USE_GAS
41588 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41589 {
41590 solaris_elf_asm_comdat_section (name, flags, decl);
41591 return;
41592 }
41593 #endif
41594
41595 default_elf_asm_named_section (name, flags, decl);
41596 }
41597 #endif /* TARGET_SOLARIS */
41598
41599 /* Return the mangling of TYPE if it is an extended fundamental type. */
41600
41601 static const char *
41602 ix86_mangle_type (const_tree type)
41603 {
41604 type = TYPE_MAIN_VARIANT (type);
41605
41606 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41607 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41608 return NULL;
41609
41610 switch (TYPE_MODE (type))
41611 {
41612 case TFmode:
41613 /* __float128 is "g". */
41614 return "g";
41615 case XFmode:
41616 /* "long double" or __float80 is "e". */
41617 return "e";
41618 default:
41619 return NULL;
41620 }
41621 }
41622
41623 /* For 32-bit code we can save PIC register setup by using
41624 __stack_chk_fail_local hidden function instead of calling
41625 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41626 register, so it is better to call __stack_chk_fail directly. */
41627
41628 static tree ATTRIBUTE_UNUSED
41629 ix86_stack_protect_fail (void)
41630 {
41631 return TARGET_64BIT
41632 ? default_external_stack_protect_fail ()
41633 : default_hidden_stack_protect_fail ();
41634 }
41635
41636 /* Select a format to encode pointers in exception handling data. CODE
41637 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41638 true if the symbol may be affected by dynamic relocations.
41639
41640 ??? All x86 object file formats are capable of representing this.
41641 After all, the relocation needed is the same as for the call insn.
41642 Whether or not a particular assembler allows us to enter such, I
41643 guess we'll have to see. */
41644 int
41645 asm_preferred_eh_data_format (int code, int global)
41646 {
41647 if (flag_pic)
41648 {
41649 int type = DW_EH_PE_sdata8;
41650 if (!TARGET_64BIT
41651 || ix86_cmodel == CM_SMALL_PIC
41652 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41653 type = DW_EH_PE_sdata4;
41654 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41655 }
41656 if (ix86_cmodel == CM_SMALL
41657 || (ix86_cmodel == CM_MEDIUM && code))
41658 return DW_EH_PE_udata4;
41659 return DW_EH_PE_absptr;
41660 }
41661 \f
41662 /* Expand copysign from SIGN to the positive value ABS_VALUE
41663 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41664 the sign-bit. */
41665 static void
41666 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41667 {
41668 enum machine_mode mode = GET_MODE (sign);
41669 rtx sgn = gen_reg_rtx (mode);
41670 if (mask == NULL_RTX)
41671 {
41672 enum machine_mode vmode;
41673
41674 if (mode == SFmode)
41675 vmode = V4SFmode;
41676 else if (mode == DFmode)
41677 vmode = V2DFmode;
41678 else
41679 vmode = mode;
41680
41681 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41682 if (!VECTOR_MODE_P (mode))
41683 {
41684 /* We need to generate a scalar mode mask in this case. */
41685 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41686 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41687 mask = gen_reg_rtx (mode);
41688 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41689 }
41690 }
41691 else
41692 mask = gen_rtx_NOT (mode, mask);
41693 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41694 gen_rtx_AND (mode, mask, sign)));
41695 emit_insn (gen_rtx_SET (VOIDmode, result,
41696 gen_rtx_IOR (mode, abs_value, sgn)));
41697 }
41698
41699 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41700 mask for masking out the sign-bit is stored in *SMASK, if that is
41701 non-null. */
41702 static rtx
41703 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41704 {
41705 enum machine_mode vmode, mode = GET_MODE (op0);
41706 rtx xa, mask;
41707
41708 xa = gen_reg_rtx (mode);
41709 if (mode == SFmode)
41710 vmode = V4SFmode;
41711 else if (mode == DFmode)
41712 vmode = V2DFmode;
41713 else
41714 vmode = mode;
41715 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41716 if (!VECTOR_MODE_P (mode))
41717 {
41718 /* We need to generate a scalar mode mask in this case. */
41719 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41720 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41721 mask = gen_reg_rtx (mode);
41722 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41723 }
41724 emit_insn (gen_rtx_SET (VOIDmode, xa,
41725 gen_rtx_AND (mode, op0, mask)));
41726
41727 if (smask)
41728 *smask = mask;
41729
41730 return xa;
41731 }
41732
41733 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41734 swapping the operands if SWAP_OPERANDS is true. The expanded
41735 code is a forward jump to a newly created label in case the
41736 comparison is true. The generated label rtx is returned. */
41737 static rtx
41738 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41739 bool swap_operands)
41740 {
41741 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41742 rtx label, tmp;
41743
41744 if (swap_operands)
41745 {
41746 tmp = op0;
41747 op0 = op1;
41748 op1 = tmp;
41749 }
41750
41751 label = gen_label_rtx ();
41752 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41753 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41754 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41755 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41756 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41757 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41758 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41759 JUMP_LABEL (tmp) = label;
41760
41761 return label;
41762 }
41763
41764 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41765 using comparison code CODE. Operands are swapped for the comparison if
41766 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41767 static rtx
41768 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41769 bool swap_operands)
41770 {
41771 rtx (*insn)(rtx, rtx, rtx, rtx);
41772 enum machine_mode mode = GET_MODE (op0);
41773 rtx mask = gen_reg_rtx (mode);
41774
41775 if (swap_operands)
41776 {
41777 rtx tmp = op0;
41778 op0 = op1;
41779 op1 = tmp;
41780 }
41781
41782 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41783
41784 emit_insn (insn (mask, op0, op1,
41785 gen_rtx_fmt_ee (code, mode, op0, op1)));
41786 return mask;
41787 }
41788
41789 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41790 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41791 static rtx
41792 ix86_gen_TWO52 (enum machine_mode mode)
41793 {
41794 REAL_VALUE_TYPE TWO52r;
41795 rtx TWO52;
41796
41797 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41798 TWO52 = const_double_from_real_value (TWO52r, mode);
41799 TWO52 = force_reg (mode, TWO52);
41800
41801 return TWO52;
41802 }
41803
41804 /* Expand SSE sequence for computing lround from OP1 storing
41805 into OP0. */
41806 void
41807 ix86_expand_lround (rtx op0, rtx op1)
41808 {
41809 /* C code for the stuff we're doing below:
41810 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41811 return (long)tmp;
41812 */
41813 enum machine_mode mode = GET_MODE (op1);
41814 const struct real_format *fmt;
41815 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41816 rtx adj;
41817
41818 /* load nextafter (0.5, 0.0) */
41819 fmt = REAL_MODE_FORMAT (mode);
41820 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41821 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41822
41823 /* adj = copysign (0.5, op1) */
41824 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41825 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41826
41827 /* adj = op1 + adj */
41828 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41829
41830 /* op0 = (imode)adj */
41831 expand_fix (op0, adj, 0);
41832 }
41833
41834 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41835 into OPERAND0. */
41836 void
41837 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41838 {
41839 /* C code for the stuff we're doing below (for do_floor):
41840 xi = (long)op1;
41841 xi -= (double)xi > op1 ? 1 : 0;
41842 return xi;
41843 */
41844 enum machine_mode fmode = GET_MODE (op1);
41845 enum machine_mode imode = GET_MODE (op0);
41846 rtx ireg, freg, label, tmp;
41847
41848 /* reg = (long)op1 */
41849 ireg = gen_reg_rtx (imode);
41850 expand_fix (ireg, op1, 0);
41851
41852 /* freg = (double)reg */
41853 freg = gen_reg_rtx (fmode);
41854 expand_float (freg, ireg, 0);
41855
41856 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41857 label = ix86_expand_sse_compare_and_jump (UNLE,
41858 freg, op1, !do_floor);
41859 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41860 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41861 emit_move_insn (ireg, tmp);
41862
41863 emit_label (label);
41864 LABEL_NUSES (label) = 1;
41865
41866 emit_move_insn (op0, ireg);
41867 }
41868
41869 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41870 result in OPERAND0. */
41871 void
41872 ix86_expand_rint (rtx operand0, rtx operand1)
41873 {
41874 /* C code for the stuff we're doing below:
41875 xa = fabs (operand1);
41876 if (!isless (xa, 2**52))
41877 return operand1;
41878 xa = xa + 2**52 - 2**52;
41879 return copysign (xa, operand1);
41880 */
41881 enum machine_mode mode = GET_MODE (operand0);
41882 rtx res, xa, label, TWO52, mask;
41883
41884 res = gen_reg_rtx (mode);
41885 emit_move_insn (res, operand1);
41886
41887 /* xa = abs (operand1) */
41888 xa = ix86_expand_sse_fabs (res, &mask);
41889
41890 /* if (!isless (xa, TWO52)) goto label; */
41891 TWO52 = ix86_gen_TWO52 (mode);
41892 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41893
41894 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41895 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41896
41897 ix86_sse_copysign_to_positive (res, xa, res, mask);
41898
41899 emit_label (label);
41900 LABEL_NUSES (label) = 1;
41901
41902 emit_move_insn (operand0, res);
41903 }
41904
41905 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41906 into OPERAND0. */
41907 void
41908 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41909 {
41910 /* C code for the stuff we expand below.
41911 double xa = fabs (x), x2;
41912 if (!isless (xa, TWO52))
41913 return x;
41914 xa = xa + TWO52 - TWO52;
41915 x2 = copysign (xa, x);
41916 Compensate. Floor:
41917 if (x2 > x)
41918 x2 -= 1;
41919 Compensate. Ceil:
41920 if (x2 < x)
41921 x2 -= -1;
41922 return x2;
41923 */
41924 enum machine_mode mode = GET_MODE (operand0);
41925 rtx xa, TWO52, tmp, label, one, res, mask;
41926
41927 TWO52 = ix86_gen_TWO52 (mode);
41928
41929 /* Temporary for holding the result, initialized to the input
41930 operand to ease control flow. */
41931 res = gen_reg_rtx (mode);
41932 emit_move_insn (res, operand1);
41933
41934 /* xa = abs (operand1) */
41935 xa = ix86_expand_sse_fabs (res, &mask);
41936
41937 /* if (!isless (xa, TWO52)) goto label; */
41938 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41939
41940 /* xa = xa + TWO52 - TWO52; */
41941 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41942 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41943
41944 /* xa = copysign (xa, operand1) */
41945 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41946
41947 /* generate 1.0 or -1.0 */
41948 one = force_reg (mode,
41949 const_double_from_real_value (do_floor
41950 ? dconst1 : dconstm1, mode));
41951
41952 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41953 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41954 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41955 gen_rtx_AND (mode, one, tmp)));
41956 /* We always need to subtract here to preserve signed zero. */
41957 tmp = expand_simple_binop (mode, MINUS,
41958 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41959 emit_move_insn (res, tmp);
41960
41961 emit_label (label);
41962 LABEL_NUSES (label) = 1;
41963
41964 emit_move_insn (operand0, res);
41965 }
41966
41967 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41968 into OPERAND0. */
41969 void
41970 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41971 {
41972 /* C code for the stuff we expand below.
41973 double xa = fabs (x), x2;
41974 if (!isless (xa, TWO52))
41975 return x;
41976 x2 = (double)(long)x;
41977 Compensate. Floor:
41978 if (x2 > x)
41979 x2 -= 1;
41980 Compensate. Ceil:
41981 if (x2 < x)
41982 x2 += 1;
41983 if (HONOR_SIGNED_ZEROS (mode))
41984 return copysign (x2, x);
41985 return x2;
41986 */
41987 enum machine_mode mode = GET_MODE (operand0);
41988 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41989
41990 TWO52 = ix86_gen_TWO52 (mode);
41991
41992 /* Temporary for holding the result, initialized to the input
41993 operand to ease control flow. */
41994 res = gen_reg_rtx (mode);
41995 emit_move_insn (res, operand1);
41996
41997 /* xa = abs (operand1) */
41998 xa = ix86_expand_sse_fabs (res, &mask);
41999
42000 /* if (!isless (xa, TWO52)) goto label; */
42001 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42002
42003 /* xa = (double)(long)x */
42004 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42005 expand_fix (xi, res, 0);
42006 expand_float (xa, xi, 0);
42007
42008 /* generate 1.0 */
42009 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42010
42011 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42012 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42013 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42014 gen_rtx_AND (mode, one, tmp)));
42015 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42016 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42017 emit_move_insn (res, tmp);
42018
42019 if (HONOR_SIGNED_ZEROS (mode))
42020 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42021
42022 emit_label (label);
42023 LABEL_NUSES (label) = 1;
42024
42025 emit_move_insn (operand0, res);
42026 }
42027
42028 /* Expand SSE sequence for computing round from OPERAND1 storing
42029 into OPERAND0. Sequence that works without relying on DImode truncation
42030 via cvttsd2siq that is only available on 64bit targets. */
42031 void
42032 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42033 {
42034 /* C code for the stuff we expand below.
42035 double xa = fabs (x), xa2, x2;
42036 if (!isless (xa, TWO52))
42037 return x;
42038 Using the absolute value and copying back sign makes
42039 -0.0 -> -0.0 correct.
42040 xa2 = xa + TWO52 - TWO52;
42041 Compensate.
42042 dxa = xa2 - xa;
42043 if (dxa <= -0.5)
42044 xa2 += 1;
42045 else if (dxa > 0.5)
42046 xa2 -= 1;
42047 x2 = copysign (xa2, x);
42048 return x2;
42049 */
42050 enum machine_mode mode = GET_MODE (operand0);
42051 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42052
42053 TWO52 = ix86_gen_TWO52 (mode);
42054
42055 /* Temporary for holding the result, initialized to the input
42056 operand to ease control flow. */
42057 res = gen_reg_rtx (mode);
42058 emit_move_insn (res, operand1);
42059
42060 /* xa = abs (operand1) */
42061 xa = ix86_expand_sse_fabs (res, &mask);
42062
42063 /* if (!isless (xa, TWO52)) goto label; */
42064 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42065
42066 /* xa2 = xa + TWO52 - TWO52; */
42067 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42068 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42069
42070 /* dxa = xa2 - xa; */
42071 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42072
42073 /* generate 0.5, 1.0 and -0.5 */
42074 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42075 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42076 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42077 0, OPTAB_DIRECT);
42078
42079 /* Compensate. */
42080 tmp = gen_reg_rtx (mode);
42081 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42082 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42083 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42084 gen_rtx_AND (mode, one, tmp)));
42085 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42086 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42087 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42088 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42089 gen_rtx_AND (mode, one, tmp)));
42090 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42091
42092 /* res = copysign (xa2, operand1) */
42093 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42094
42095 emit_label (label);
42096 LABEL_NUSES (label) = 1;
42097
42098 emit_move_insn (operand0, res);
42099 }
42100
42101 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42102 into OPERAND0. */
42103 void
42104 ix86_expand_trunc (rtx operand0, rtx operand1)
42105 {
42106 /* C code for SSE variant we expand below.
42107 double xa = fabs (x), x2;
42108 if (!isless (xa, TWO52))
42109 return x;
42110 x2 = (double)(long)x;
42111 if (HONOR_SIGNED_ZEROS (mode))
42112 return copysign (x2, x);
42113 return x2;
42114 */
42115 enum machine_mode mode = GET_MODE (operand0);
42116 rtx xa, xi, TWO52, label, res, mask;
42117
42118 TWO52 = ix86_gen_TWO52 (mode);
42119
42120 /* Temporary for holding the result, initialized to the input
42121 operand to ease control flow. */
42122 res = gen_reg_rtx (mode);
42123 emit_move_insn (res, operand1);
42124
42125 /* xa = abs (operand1) */
42126 xa = ix86_expand_sse_fabs (res, &mask);
42127
42128 /* if (!isless (xa, TWO52)) goto label; */
42129 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42130
42131 /* x = (double)(long)x */
42132 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42133 expand_fix (xi, res, 0);
42134 expand_float (res, xi, 0);
42135
42136 if (HONOR_SIGNED_ZEROS (mode))
42137 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42138
42139 emit_label (label);
42140 LABEL_NUSES (label) = 1;
42141
42142 emit_move_insn (operand0, res);
42143 }
42144
42145 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42146 into OPERAND0. */
42147 void
42148 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42149 {
42150 enum machine_mode mode = GET_MODE (operand0);
42151 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42152
42153 /* C code for SSE variant we expand below.
42154 double xa = fabs (x), x2;
42155 if (!isless (xa, TWO52))
42156 return x;
42157 xa2 = xa + TWO52 - TWO52;
42158 Compensate:
42159 if (xa2 > xa)
42160 xa2 -= 1.0;
42161 x2 = copysign (xa2, x);
42162 return x2;
42163 */
42164
42165 TWO52 = ix86_gen_TWO52 (mode);
42166
42167 /* Temporary for holding the result, initialized to the input
42168 operand to ease control flow. */
42169 res = gen_reg_rtx (mode);
42170 emit_move_insn (res, operand1);
42171
42172 /* xa = abs (operand1) */
42173 xa = ix86_expand_sse_fabs (res, &smask);
42174
42175 /* if (!isless (xa, TWO52)) goto label; */
42176 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42177
42178 /* res = xa + TWO52 - TWO52; */
42179 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42180 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42181 emit_move_insn (res, tmp);
42182
42183 /* generate 1.0 */
42184 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42185
42186 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42187 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42188 emit_insn (gen_rtx_SET (VOIDmode, mask,
42189 gen_rtx_AND (mode, mask, one)));
42190 tmp = expand_simple_binop (mode, MINUS,
42191 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42192 emit_move_insn (res, tmp);
42193
42194 /* res = copysign (res, operand1) */
42195 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42196
42197 emit_label (label);
42198 LABEL_NUSES (label) = 1;
42199
42200 emit_move_insn (operand0, res);
42201 }
42202
42203 /* Expand SSE sequence for computing round from OPERAND1 storing
42204 into OPERAND0. */
42205 void
42206 ix86_expand_round (rtx operand0, rtx operand1)
42207 {
42208 /* C code for the stuff we're doing below:
42209 double xa = fabs (x);
42210 if (!isless (xa, TWO52))
42211 return x;
42212 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42213 return copysign (xa, x);
42214 */
42215 enum machine_mode mode = GET_MODE (operand0);
42216 rtx res, TWO52, xa, label, xi, half, mask;
42217 const struct real_format *fmt;
42218 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42219
42220 /* Temporary for holding the result, initialized to the input
42221 operand to ease control flow. */
42222 res = gen_reg_rtx (mode);
42223 emit_move_insn (res, operand1);
42224
42225 TWO52 = ix86_gen_TWO52 (mode);
42226 xa = ix86_expand_sse_fabs (res, &mask);
42227 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42228
42229 /* load nextafter (0.5, 0.0) */
42230 fmt = REAL_MODE_FORMAT (mode);
42231 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42232 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42233
42234 /* xa = xa + 0.5 */
42235 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42236 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42237
42238 /* xa = (double)(int64_t)xa */
42239 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42240 expand_fix (xi, xa, 0);
42241 expand_float (xa, xi, 0);
42242
42243 /* res = copysign (xa, operand1) */
42244 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42245
42246 emit_label (label);
42247 LABEL_NUSES (label) = 1;
42248
42249 emit_move_insn (operand0, res);
42250 }
42251
42252 /* Expand SSE sequence for computing round
42253 from OP1 storing into OP0 using sse4 round insn. */
42254 void
42255 ix86_expand_round_sse4 (rtx op0, rtx op1)
42256 {
42257 enum machine_mode mode = GET_MODE (op0);
42258 rtx e1, e2, res, half;
42259 const struct real_format *fmt;
42260 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42261 rtx (*gen_copysign) (rtx, rtx, rtx);
42262 rtx (*gen_round) (rtx, rtx, rtx);
42263
42264 switch (mode)
42265 {
42266 case SFmode:
42267 gen_copysign = gen_copysignsf3;
42268 gen_round = gen_sse4_1_roundsf2;
42269 break;
42270 case DFmode:
42271 gen_copysign = gen_copysigndf3;
42272 gen_round = gen_sse4_1_rounddf2;
42273 break;
42274 default:
42275 gcc_unreachable ();
42276 }
42277
42278 /* round (a) = trunc (a + copysign (0.5, a)) */
42279
42280 /* load nextafter (0.5, 0.0) */
42281 fmt = REAL_MODE_FORMAT (mode);
42282 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42283 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42284 half = const_double_from_real_value (pred_half, mode);
42285
42286 /* e1 = copysign (0.5, op1) */
42287 e1 = gen_reg_rtx (mode);
42288 emit_insn (gen_copysign (e1, half, op1));
42289
42290 /* e2 = op1 + e1 */
42291 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42292
42293 /* res = trunc (e2) */
42294 res = gen_reg_rtx (mode);
42295 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42296
42297 emit_move_insn (op0, res);
42298 }
42299 \f
42300
42301 /* Table of valid machine attributes. */
42302 static const struct attribute_spec ix86_attribute_table[] =
42303 {
42304 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42305 affects_type_identity } */
42306 /* Stdcall attribute says callee is responsible for popping arguments
42307 if they are not variable. */
42308 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42309 true },
42310 /* Fastcall attribute says callee is responsible for popping arguments
42311 if they are not variable. */
42312 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42313 true },
42314 /* Thiscall attribute says callee is responsible for popping arguments
42315 if they are not variable. */
42316 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42317 true },
42318 /* Cdecl attribute says the callee is a normal C declaration */
42319 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42320 true },
42321 /* Regparm attribute specifies how many integer arguments are to be
42322 passed in registers. */
42323 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42324 true },
42325 /* Sseregparm attribute says we are using x86_64 calling conventions
42326 for FP arguments. */
42327 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42328 true },
42329 /* The transactional memory builtins are implicitly regparm or fastcall
42330 depending on the ABI. Override the generic do-nothing attribute that
42331 these builtins were declared with. */
42332 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42333 true },
42334 /* force_align_arg_pointer says this function realigns the stack at entry. */
42335 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42336 false, true, true, ix86_handle_cconv_attribute, false },
42337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42338 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42339 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42340 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42341 false },
42342 #endif
42343 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42344 false },
42345 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42346 false },
42347 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42348 SUBTARGET_ATTRIBUTE_TABLE,
42349 #endif
42350 /* ms_abi and sysv_abi calling convention function attributes. */
42351 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42352 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42353 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42354 false },
42355 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42356 ix86_handle_callee_pop_aggregate_return, true },
42357 /* End element. */
42358 { NULL, 0, 0, false, false, false, NULL, false }
42359 };
42360
42361 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42362 static int
42363 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42364 tree vectype, int)
42365 {
42366 unsigned elements;
42367
42368 switch (type_of_cost)
42369 {
42370 case scalar_stmt:
42371 return ix86_cost->scalar_stmt_cost;
42372
42373 case scalar_load:
42374 return ix86_cost->scalar_load_cost;
42375
42376 case scalar_store:
42377 return ix86_cost->scalar_store_cost;
42378
42379 case vector_stmt:
42380 return ix86_cost->vec_stmt_cost;
42381
42382 case vector_load:
42383 return ix86_cost->vec_align_load_cost;
42384
42385 case vector_store:
42386 return ix86_cost->vec_store_cost;
42387
42388 case vec_to_scalar:
42389 return ix86_cost->vec_to_scalar_cost;
42390
42391 case scalar_to_vec:
42392 return ix86_cost->scalar_to_vec_cost;
42393
42394 case unaligned_load:
42395 case unaligned_store:
42396 return ix86_cost->vec_unalign_load_cost;
42397
42398 case cond_branch_taken:
42399 return ix86_cost->cond_taken_branch_cost;
42400
42401 case cond_branch_not_taken:
42402 return ix86_cost->cond_not_taken_branch_cost;
42403
42404 case vec_perm:
42405 case vec_promote_demote:
42406 return ix86_cost->vec_stmt_cost;
42407
42408 case vec_construct:
42409 elements = TYPE_VECTOR_SUBPARTS (vectype);
42410 return elements / 2 + 1;
42411
42412 default:
42413 gcc_unreachable ();
42414 }
42415 }
42416
42417 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42418 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42419 insn every time. */
42420
42421 static GTY(()) rtx vselect_insn;
42422
42423 /* Initialize vselect_insn. */
42424
42425 static void
42426 init_vselect_insn (void)
42427 {
42428 unsigned i;
42429 rtx x;
42430
42431 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42432 for (i = 0; i < MAX_VECT_LEN; ++i)
42433 XVECEXP (x, 0, i) = const0_rtx;
42434 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42435 const0_rtx), x);
42436 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42437 start_sequence ();
42438 vselect_insn = emit_insn (x);
42439 end_sequence ();
42440 }
42441
42442 /* Construct (set target (vec_select op0 (parallel perm))) and
42443 return true if that's a valid instruction in the active ISA. */
42444
42445 static bool
42446 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42447 unsigned nelt, bool testing_p)
42448 {
42449 unsigned int i;
42450 rtx x, save_vconcat;
42451 int icode;
42452
42453 if (vselect_insn == NULL_RTX)
42454 init_vselect_insn ();
42455
42456 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42457 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42458 for (i = 0; i < nelt; ++i)
42459 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42460 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42461 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42462 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42463 SET_DEST (PATTERN (vselect_insn)) = target;
42464 icode = recog_memoized (vselect_insn);
42465
42466 if (icode >= 0 && !testing_p)
42467 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42468
42469 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42470 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42471 INSN_CODE (vselect_insn) = -1;
42472
42473 return icode >= 0;
42474 }
42475
42476 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42477
42478 static bool
42479 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42480 const unsigned char *perm, unsigned nelt,
42481 bool testing_p)
42482 {
42483 enum machine_mode v2mode;
42484 rtx x;
42485 bool ok;
42486
42487 if (vselect_insn == NULL_RTX)
42488 init_vselect_insn ();
42489
42490 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42491 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42492 PUT_MODE (x, v2mode);
42493 XEXP (x, 0) = op0;
42494 XEXP (x, 1) = op1;
42495 ok = expand_vselect (target, x, perm, nelt, testing_p);
42496 XEXP (x, 0) = const0_rtx;
42497 XEXP (x, 1) = const0_rtx;
42498 return ok;
42499 }
42500
42501 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42502 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42503
42504 static bool
42505 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42506 {
42507 enum machine_mode vmode = d->vmode;
42508 unsigned i, mask, nelt = d->nelt;
42509 rtx target, op0, op1, x;
42510 rtx rperm[32], vperm;
42511
42512 if (d->one_operand_p)
42513 return false;
42514 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42515 ;
42516 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42517 ;
42518 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42519 ;
42520 else
42521 return false;
42522
42523 /* This is a blend, not a permute. Elements must stay in their
42524 respective lanes. */
42525 for (i = 0; i < nelt; ++i)
42526 {
42527 unsigned e = d->perm[i];
42528 if (!(e == i || e == i + nelt))
42529 return false;
42530 }
42531
42532 if (d->testing_p)
42533 return true;
42534
42535 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42536 decision should be extracted elsewhere, so that we only try that
42537 sequence once all budget==3 options have been tried. */
42538 target = d->target;
42539 op0 = d->op0;
42540 op1 = d->op1;
42541 mask = 0;
42542
42543 switch (vmode)
42544 {
42545 case V4DFmode:
42546 case V8SFmode:
42547 case V2DFmode:
42548 case V4SFmode:
42549 case V8HImode:
42550 case V8SImode:
42551 for (i = 0; i < nelt; ++i)
42552 mask |= (d->perm[i] >= nelt) << i;
42553 break;
42554
42555 case V2DImode:
42556 for (i = 0; i < 2; ++i)
42557 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42558 vmode = V8HImode;
42559 goto do_subreg;
42560
42561 case V4SImode:
42562 for (i = 0; i < 4; ++i)
42563 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42564 vmode = V8HImode;
42565 goto do_subreg;
42566
42567 case V16QImode:
42568 /* See if bytes move in pairs so we can use pblendw with
42569 an immediate argument, rather than pblendvb with a vector
42570 argument. */
42571 for (i = 0; i < 16; i += 2)
42572 if (d->perm[i] + 1 != d->perm[i + 1])
42573 {
42574 use_pblendvb:
42575 for (i = 0; i < nelt; ++i)
42576 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42577
42578 finish_pblendvb:
42579 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42580 vperm = force_reg (vmode, vperm);
42581
42582 if (GET_MODE_SIZE (vmode) == 16)
42583 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42584 else
42585 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42586 if (target != d->target)
42587 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42588 return true;
42589 }
42590
42591 for (i = 0; i < 8; ++i)
42592 mask |= (d->perm[i * 2] >= 16) << i;
42593 vmode = V8HImode;
42594 /* FALLTHRU */
42595
42596 do_subreg:
42597 target = gen_reg_rtx (vmode);
42598 op0 = gen_lowpart (vmode, op0);
42599 op1 = gen_lowpart (vmode, op1);
42600 break;
42601
42602 case V32QImode:
42603 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42604 for (i = 0; i < 32; i += 2)
42605 if (d->perm[i] + 1 != d->perm[i + 1])
42606 goto use_pblendvb;
42607 /* See if bytes move in quadruplets. If yes, vpblendd
42608 with immediate can be used. */
42609 for (i = 0; i < 32; i += 4)
42610 if (d->perm[i] + 2 != d->perm[i + 2])
42611 break;
42612 if (i < 32)
42613 {
42614 /* See if bytes move the same in both lanes. If yes,
42615 vpblendw with immediate can be used. */
42616 for (i = 0; i < 16; i += 2)
42617 if (d->perm[i] + 16 != d->perm[i + 16])
42618 goto use_pblendvb;
42619
42620 /* Use vpblendw. */
42621 for (i = 0; i < 16; ++i)
42622 mask |= (d->perm[i * 2] >= 32) << i;
42623 vmode = V16HImode;
42624 goto do_subreg;
42625 }
42626
42627 /* Use vpblendd. */
42628 for (i = 0; i < 8; ++i)
42629 mask |= (d->perm[i * 4] >= 32) << i;
42630 vmode = V8SImode;
42631 goto do_subreg;
42632
42633 case V16HImode:
42634 /* See if words move in pairs. If yes, vpblendd can be used. */
42635 for (i = 0; i < 16; i += 2)
42636 if (d->perm[i] + 1 != d->perm[i + 1])
42637 break;
42638 if (i < 16)
42639 {
42640 /* See if words move the same in both lanes. If not,
42641 vpblendvb must be used. */
42642 for (i = 0; i < 8; i++)
42643 if (d->perm[i] + 8 != d->perm[i + 8])
42644 {
42645 /* Use vpblendvb. */
42646 for (i = 0; i < 32; ++i)
42647 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42648
42649 vmode = V32QImode;
42650 nelt = 32;
42651 target = gen_reg_rtx (vmode);
42652 op0 = gen_lowpart (vmode, op0);
42653 op1 = gen_lowpart (vmode, op1);
42654 goto finish_pblendvb;
42655 }
42656
42657 /* Use vpblendw. */
42658 for (i = 0; i < 16; ++i)
42659 mask |= (d->perm[i] >= 16) << i;
42660 break;
42661 }
42662
42663 /* Use vpblendd. */
42664 for (i = 0; i < 8; ++i)
42665 mask |= (d->perm[i * 2] >= 16) << i;
42666 vmode = V8SImode;
42667 goto do_subreg;
42668
42669 case V4DImode:
42670 /* Use vpblendd. */
42671 for (i = 0; i < 4; ++i)
42672 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42673 vmode = V8SImode;
42674 goto do_subreg;
42675
42676 default:
42677 gcc_unreachable ();
42678 }
42679
42680 /* This matches five different patterns with the different modes. */
42681 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42682 x = gen_rtx_SET (VOIDmode, target, x);
42683 emit_insn (x);
42684 if (target != d->target)
42685 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42686
42687 return true;
42688 }
42689
42690 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42691 in terms of the variable form of vpermilps.
42692
42693 Note that we will have already failed the immediate input vpermilps,
42694 which requires that the high and low part shuffle be identical; the
42695 variable form doesn't require that. */
42696
42697 static bool
42698 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42699 {
42700 rtx rperm[8], vperm;
42701 unsigned i;
42702
42703 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42704 return false;
42705
42706 /* We can only permute within the 128-bit lane. */
42707 for (i = 0; i < 8; ++i)
42708 {
42709 unsigned e = d->perm[i];
42710 if (i < 4 ? e >= 4 : e < 4)
42711 return false;
42712 }
42713
42714 if (d->testing_p)
42715 return true;
42716
42717 for (i = 0; i < 8; ++i)
42718 {
42719 unsigned e = d->perm[i];
42720
42721 /* Within each 128-bit lane, the elements of op0 are numbered
42722 from 0 and the elements of op1 are numbered from 4. */
42723 if (e >= 8 + 4)
42724 e -= 8;
42725 else if (e >= 4)
42726 e -= 4;
42727
42728 rperm[i] = GEN_INT (e);
42729 }
42730
42731 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42732 vperm = force_reg (V8SImode, vperm);
42733 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42734
42735 return true;
42736 }
42737
42738 /* Return true if permutation D can be performed as VMODE permutation
42739 instead. */
42740
42741 static bool
42742 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42743 {
42744 unsigned int i, j, chunk;
42745
42746 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42747 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42748 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42749 return false;
42750
42751 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42752 return true;
42753
42754 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42755 for (i = 0; i < d->nelt; i += chunk)
42756 if (d->perm[i] & (chunk - 1))
42757 return false;
42758 else
42759 for (j = 1; j < chunk; ++j)
42760 if (d->perm[i] + j != d->perm[i + j])
42761 return false;
42762
42763 return true;
42764 }
42765
42766 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42767 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42768
42769 static bool
42770 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42771 {
42772 unsigned i, nelt, eltsz, mask;
42773 unsigned char perm[32];
42774 enum machine_mode vmode = V16QImode;
42775 rtx rperm[32], vperm, target, op0, op1;
42776
42777 nelt = d->nelt;
42778
42779 if (!d->one_operand_p)
42780 {
42781 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42782 {
42783 if (TARGET_AVX2
42784 && valid_perm_using_mode_p (V2TImode, d))
42785 {
42786 if (d->testing_p)
42787 return true;
42788
42789 /* Use vperm2i128 insn. The pattern uses
42790 V4DImode instead of V2TImode. */
42791 target = d->target;
42792 if (d->vmode != V4DImode)
42793 target = gen_reg_rtx (V4DImode);
42794 op0 = gen_lowpart (V4DImode, d->op0);
42795 op1 = gen_lowpart (V4DImode, d->op1);
42796 rperm[0]
42797 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42798 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42799 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42800 if (target != d->target)
42801 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42802 return true;
42803 }
42804 return false;
42805 }
42806 }
42807 else
42808 {
42809 if (GET_MODE_SIZE (d->vmode) == 16)
42810 {
42811 if (!TARGET_SSSE3)
42812 return false;
42813 }
42814 else if (GET_MODE_SIZE (d->vmode) == 32)
42815 {
42816 if (!TARGET_AVX2)
42817 return false;
42818
42819 /* V4DImode should be already handled through
42820 expand_vselect by vpermq instruction. */
42821 gcc_assert (d->vmode != V4DImode);
42822
42823 vmode = V32QImode;
42824 if (d->vmode == V8SImode
42825 || d->vmode == V16HImode
42826 || d->vmode == V32QImode)
42827 {
42828 /* First see if vpermq can be used for
42829 V8SImode/V16HImode/V32QImode. */
42830 if (valid_perm_using_mode_p (V4DImode, d))
42831 {
42832 for (i = 0; i < 4; i++)
42833 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42834 if (d->testing_p)
42835 return true;
42836 target = gen_reg_rtx (V4DImode);
42837 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42838 perm, 4, false))
42839 {
42840 emit_move_insn (d->target,
42841 gen_lowpart (d->vmode, target));
42842 return true;
42843 }
42844 return false;
42845 }
42846
42847 /* Next see if vpermd can be used. */
42848 if (valid_perm_using_mode_p (V8SImode, d))
42849 vmode = V8SImode;
42850 }
42851 /* Or if vpermps can be used. */
42852 else if (d->vmode == V8SFmode)
42853 vmode = V8SImode;
42854
42855 if (vmode == V32QImode)
42856 {
42857 /* vpshufb only works intra lanes, it is not
42858 possible to shuffle bytes in between the lanes. */
42859 for (i = 0; i < nelt; ++i)
42860 if ((d->perm[i] ^ i) & (nelt / 2))
42861 return false;
42862 }
42863 }
42864 else
42865 return false;
42866 }
42867
42868 if (d->testing_p)
42869 return true;
42870
42871 if (vmode == V8SImode)
42872 for (i = 0; i < 8; ++i)
42873 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42874 else
42875 {
42876 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42877 if (!d->one_operand_p)
42878 mask = 2 * nelt - 1;
42879 else if (vmode == V16QImode)
42880 mask = nelt - 1;
42881 else
42882 mask = nelt / 2 - 1;
42883
42884 for (i = 0; i < nelt; ++i)
42885 {
42886 unsigned j, e = d->perm[i] & mask;
42887 for (j = 0; j < eltsz; ++j)
42888 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42889 }
42890 }
42891
42892 vperm = gen_rtx_CONST_VECTOR (vmode,
42893 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42894 vperm = force_reg (vmode, vperm);
42895
42896 target = d->target;
42897 if (d->vmode != vmode)
42898 target = gen_reg_rtx (vmode);
42899 op0 = gen_lowpart (vmode, d->op0);
42900 if (d->one_operand_p)
42901 {
42902 if (vmode == V16QImode)
42903 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42904 else if (vmode == V32QImode)
42905 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42906 else if (vmode == V8SFmode)
42907 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42908 else
42909 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42910 }
42911 else
42912 {
42913 op1 = gen_lowpart (vmode, d->op1);
42914 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42915 }
42916 if (target != d->target)
42917 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42918
42919 return true;
42920 }
42921
42922 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42923 in a single instruction. */
42924
42925 static bool
42926 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42927 {
42928 unsigned i, nelt = d->nelt;
42929 unsigned char perm2[MAX_VECT_LEN];
42930
42931 /* Check plain VEC_SELECT first, because AVX has instructions that could
42932 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42933 input where SEL+CONCAT may not. */
42934 if (d->one_operand_p)
42935 {
42936 int mask = nelt - 1;
42937 bool identity_perm = true;
42938 bool broadcast_perm = true;
42939
42940 for (i = 0; i < nelt; i++)
42941 {
42942 perm2[i] = d->perm[i] & mask;
42943 if (perm2[i] != i)
42944 identity_perm = false;
42945 if (perm2[i])
42946 broadcast_perm = false;
42947 }
42948
42949 if (identity_perm)
42950 {
42951 if (!d->testing_p)
42952 emit_move_insn (d->target, d->op0);
42953 return true;
42954 }
42955 else if (broadcast_perm && TARGET_AVX2)
42956 {
42957 /* Use vpbroadcast{b,w,d}. */
42958 rtx (*gen) (rtx, rtx) = NULL;
42959 switch (d->vmode)
42960 {
42961 case V32QImode:
42962 gen = gen_avx2_pbroadcastv32qi_1;
42963 break;
42964 case V16HImode:
42965 gen = gen_avx2_pbroadcastv16hi_1;
42966 break;
42967 case V8SImode:
42968 gen = gen_avx2_pbroadcastv8si_1;
42969 break;
42970 case V16QImode:
42971 gen = gen_avx2_pbroadcastv16qi;
42972 break;
42973 case V8HImode:
42974 gen = gen_avx2_pbroadcastv8hi;
42975 break;
42976 case V8SFmode:
42977 gen = gen_avx2_vec_dupv8sf_1;
42978 break;
42979 /* For other modes prefer other shuffles this function creates. */
42980 default: break;
42981 }
42982 if (gen != NULL)
42983 {
42984 if (!d->testing_p)
42985 emit_insn (gen (d->target, d->op0));
42986 return true;
42987 }
42988 }
42989
42990 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42991 return true;
42992
42993 /* There are plenty of patterns in sse.md that are written for
42994 SEL+CONCAT and are not replicated for a single op. Perhaps
42995 that should be changed, to avoid the nastiness here. */
42996
42997 /* Recognize interleave style patterns, which means incrementing
42998 every other permutation operand. */
42999 for (i = 0; i < nelt; i += 2)
43000 {
43001 perm2[i] = d->perm[i] & mask;
43002 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43003 }
43004 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43005 d->testing_p))
43006 return true;
43007
43008 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43009 if (nelt >= 4)
43010 {
43011 for (i = 0; i < nelt; i += 4)
43012 {
43013 perm2[i + 0] = d->perm[i + 0] & mask;
43014 perm2[i + 1] = d->perm[i + 1] & mask;
43015 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43016 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43017 }
43018
43019 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43020 d->testing_p))
43021 return true;
43022 }
43023 }
43024
43025 /* Finally, try the fully general two operand permute. */
43026 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43027 d->testing_p))
43028 return true;
43029
43030 /* Recognize interleave style patterns with reversed operands. */
43031 if (!d->one_operand_p)
43032 {
43033 for (i = 0; i < nelt; ++i)
43034 {
43035 unsigned e = d->perm[i];
43036 if (e >= nelt)
43037 e -= nelt;
43038 else
43039 e += nelt;
43040 perm2[i] = e;
43041 }
43042
43043 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43044 d->testing_p))
43045 return true;
43046 }
43047
43048 /* Try the SSE4.1 blend variable merge instructions. */
43049 if (expand_vec_perm_blend (d))
43050 return true;
43051
43052 /* Try one of the AVX vpermil variable permutations. */
43053 if (expand_vec_perm_vpermil (d))
43054 return true;
43055
43056 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43057 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43058 if (expand_vec_perm_pshufb (d))
43059 return true;
43060
43061 /* Try the AVX512F vpermi2 instructions. */
43062 rtx vec[64];
43063 enum machine_mode mode = d->vmode;
43064 if (mode == V8DFmode)
43065 mode = V8DImode;
43066 else if (mode == V16SFmode)
43067 mode = V16SImode;
43068 for (i = 0; i < nelt; ++i)
43069 vec[i] = GEN_INT (d->perm[i]);
43070 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43071 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43072 return true;
43073
43074 return false;
43075 }
43076
43077 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43078 in terms of a pair of pshuflw + pshufhw instructions. */
43079
43080 static bool
43081 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43082 {
43083 unsigned char perm2[MAX_VECT_LEN];
43084 unsigned i;
43085 bool ok;
43086
43087 if (d->vmode != V8HImode || !d->one_operand_p)
43088 return false;
43089
43090 /* The two permutations only operate in 64-bit lanes. */
43091 for (i = 0; i < 4; ++i)
43092 if (d->perm[i] >= 4)
43093 return false;
43094 for (i = 4; i < 8; ++i)
43095 if (d->perm[i] < 4)
43096 return false;
43097
43098 if (d->testing_p)
43099 return true;
43100
43101 /* Emit the pshuflw. */
43102 memcpy (perm2, d->perm, 4);
43103 for (i = 4; i < 8; ++i)
43104 perm2[i] = i;
43105 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43106 gcc_assert (ok);
43107
43108 /* Emit the pshufhw. */
43109 memcpy (perm2 + 4, d->perm + 4, 4);
43110 for (i = 0; i < 4; ++i)
43111 perm2[i] = i;
43112 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43113 gcc_assert (ok);
43114
43115 return true;
43116 }
43117
43118 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43119 the permutation using the SSSE3 palignr instruction. This succeeds
43120 when all of the elements in PERM fit within one vector and we merely
43121 need to shift them down so that a single vector permutation has a
43122 chance to succeed. */
43123
43124 static bool
43125 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43126 {
43127 unsigned i, nelt = d->nelt;
43128 unsigned min, max;
43129 bool in_order, ok;
43130 rtx shift, target;
43131 struct expand_vec_perm_d dcopy;
43132
43133 /* Even with AVX, palignr only operates on 128-bit vectors. */
43134 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43135 return false;
43136
43137 min = nelt, max = 0;
43138 for (i = 0; i < nelt; ++i)
43139 {
43140 unsigned e = d->perm[i];
43141 if (e < min)
43142 min = e;
43143 if (e > max)
43144 max = e;
43145 }
43146 if (min == 0 || max - min >= nelt)
43147 return false;
43148
43149 /* Given that we have SSSE3, we know we'll be able to implement the
43150 single operand permutation after the palignr with pshufb. */
43151 if (d->testing_p)
43152 return true;
43153
43154 dcopy = *d;
43155 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43156 target = gen_reg_rtx (TImode);
43157 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43158 gen_lowpart (TImode, d->op0), shift));
43159
43160 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43161 dcopy.one_operand_p = true;
43162
43163 in_order = true;
43164 for (i = 0; i < nelt; ++i)
43165 {
43166 unsigned e = dcopy.perm[i] - min;
43167 if (e != i)
43168 in_order = false;
43169 dcopy.perm[i] = e;
43170 }
43171
43172 /* Test for the degenerate case where the alignment by itself
43173 produces the desired permutation. */
43174 if (in_order)
43175 {
43176 emit_move_insn (d->target, dcopy.op0);
43177 return true;
43178 }
43179
43180 ok = expand_vec_perm_1 (&dcopy);
43181 gcc_assert (ok);
43182
43183 return ok;
43184 }
43185
43186 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43187 the permutation using the SSE4_1 pblendv instruction. Potentially
43188 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43189
43190 static bool
43191 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43192 {
43193 unsigned i, which, nelt = d->nelt;
43194 struct expand_vec_perm_d dcopy, dcopy1;
43195 enum machine_mode vmode = d->vmode;
43196 bool ok;
43197
43198 /* Use the same checks as in expand_vec_perm_blend, but skipping
43199 AVX and AVX2 as they require more than 2 instructions. */
43200 if (d->one_operand_p)
43201 return false;
43202 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43203 ;
43204 else
43205 return false;
43206
43207 /* Figure out where permutation elements stay not in their
43208 respective lanes. */
43209 for (i = 0, which = 0; i < nelt; ++i)
43210 {
43211 unsigned e = d->perm[i];
43212 if (e != i)
43213 which |= (e < nelt ? 1 : 2);
43214 }
43215 /* We can pblend the part where elements stay not in their
43216 respective lanes only when these elements are all in one
43217 half of a permutation.
43218 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43219 lanes, but both 8 and 9 >= 8
43220 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43221 respective lanes and 8 >= 8, but 2 not. */
43222 if (which != 1 && which != 2)
43223 return false;
43224 if (d->testing_p)
43225 return true;
43226
43227 /* First we apply one operand permutation to the part where
43228 elements stay not in their respective lanes. */
43229 dcopy = *d;
43230 if (which == 2)
43231 dcopy.op0 = dcopy.op1 = d->op1;
43232 else
43233 dcopy.op0 = dcopy.op1 = d->op0;
43234 dcopy.one_operand_p = true;
43235
43236 for (i = 0; i < nelt; ++i)
43237 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43238
43239 ok = expand_vec_perm_1 (&dcopy);
43240 gcc_assert (ok);
43241
43242 /* Next we put permuted elements into their positions. */
43243 dcopy1 = *d;
43244 if (which == 2)
43245 dcopy1.op1 = dcopy.target;
43246 else
43247 dcopy1.op0 = dcopy.target;
43248
43249 for (i = 0; i < nelt; ++i)
43250 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43251
43252 ok = expand_vec_perm_blend (&dcopy1);
43253 gcc_assert (ok);
43254
43255 return true;
43256 }
43257
43258 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43259
43260 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43261 a two vector permutation into a single vector permutation by using
43262 an interleave operation to merge the vectors. */
43263
43264 static bool
43265 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43266 {
43267 struct expand_vec_perm_d dremap, dfinal;
43268 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43269 unsigned HOST_WIDE_INT contents;
43270 unsigned char remap[2 * MAX_VECT_LEN];
43271 rtx seq;
43272 bool ok, same_halves = false;
43273
43274 if (GET_MODE_SIZE (d->vmode) == 16)
43275 {
43276 if (d->one_operand_p)
43277 return false;
43278 }
43279 else if (GET_MODE_SIZE (d->vmode) == 32)
43280 {
43281 if (!TARGET_AVX)
43282 return false;
43283 /* For 32-byte modes allow even d->one_operand_p.
43284 The lack of cross-lane shuffling in some instructions
43285 might prevent a single insn shuffle. */
43286 dfinal = *d;
43287 dfinal.testing_p = true;
43288 /* If expand_vec_perm_interleave3 can expand this into
43289 a 3 insn sequence, give up and let it be expanded as
43290 3 insn sequence. While that is one insn longer,
43291 it doesn't need a memory operand and in the common
43292 case that both interleave low and high permutations
43293 with the same operands are adjacent needs 4 insns
43294 for both after CSE. */
43295 if (expand_vec_perm_interleave3 (&dfinal))
43296 return false;
43297 }
43298 else
43299 return false;
43300
43301 /* Examine from whence the elements come. */
43302 contents = 0;
43303 for (i = 0; i < nelt; ++i)
43304 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43305
43306 memset (remap, 0xff, sizeof (remap));
43307 dremap = *d;
43308
43309 if (GET_MODE_SIZE (d->vmode) == 16)
43310 {
43311 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43312
43313 /* Split the two input vectors into 4 halves. */
43314 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43315 h2 = h1 << nelt2;
43316 h3 = h2 << nelt2;
43317 h4 = h3 << nelt2;
43318
43319 /* If the elements from the low halves use interleave low, and similarly
43320 for interleave high. If the elements are from mis-matched halves, we
43321 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43322 if ((contents & (h1 | h3)) == contents)
43323 {
43324 /* punpckl* */
43325 for (i = 0; i < nelt2; ++i)
43326 {
43327 remap[i] = i * 2;
43328 remap[i + nelt] = i * 2 + 1;
43329 dremap.perm[i * 2] = i;
43330 dremap.perm[i * 2 + 1] = i + nelt;
43331 }
43332 if (!TARGET_SSE2 && d->vmode == V4SImode)
43333 dremap.vmode = V4SFmode;
43334 }
43335 else if ((contents & (h2 | h4)) == contents)
43336 {
43337 /* punpckh* */
43338 for (i = 0; i < nelt2; ++i)
43339 {
43340 remap[i + nelt2] = i * 2;
43341 remap[i + nelt + nelt2] = i * 2 + 1;
43342 dremap.perm[i * 2] = i + nelt2;
43343 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43344 }
43345 if (!TARGET_SSE2 && d->vmode == V4SImode)
43346 dremap.vmode = V4SFmode;
43347 }
43348 else if ((contents & (h1 | h4)) == contents)
43349 {
43350 /* shufps */
43351 for (i = 0; i < nelt2; ++i)
43352 {
43353 remap[i] = i;
43354 remap[i + nelt + nelt2] = i + nelt2;
43355 dremap.perm[i] = i;
43356 dremap.perm[i + nelt2] = i + nelt + nelt2;
43357 }
43358 if (nelt != 4)
43359 {
43360 /* shufpd */
43361 dremap.vmode = V2DImode;
43362 dremap.nelt = 2;
43363 dremap.perm[0] = 0;
43364 dremap.perm[1] = 3;
43365 }
43366 }
43367 else if ((contents & (h2 | h3)) == contents)
43368 {
43369 /* shufps */
43370 for (i = 0; i < nelt2; ++i)
43371 {
43372 remap[i + nelt2] = i;
43373 remap[i + nelt] = i + nelt2;
43374 dremap.perm[i] = i + nelt2;
43375 dremap.perm[i + nelt2] = i + nelt;
43376 }
43377 if (nelt != 4)
43378 {
43379 /* shufpd */
43380 dremap.vmode = V2DImode;
43381 dremap.nelt = 2;
43382 dremap.perm[0] = 1;
43383 dremap.perm[1] = 2;
43384 }
43385 }
43386 else
43387 return false;
43388 }
43389 else
43390 {
43391 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43392 unsigned HOST_WIDE_INT q[8];
43393 unsigned int nonzero_halves[4];
43394
43395 /* Split the two input vectors into 8 quarters. */
43396 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43397 for (i = 1; i < 8; ++i)
43398 q[i] = q[0] << (nelt4 * i);
43399 for (i = 0; i < 4; ++i)
43400 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43401 {
43402 nonzero_halves[nzcnt] = i;
43403 ++nzcnt;
43404 }
43405
43406 if (nzcnt == 1)
43407 {
43408 gcc_assert (d->one_operand_p);
43409 nonzero_halves[1] = nonzero_halves[0];
43410 same_halves = true;
43411 }
43412 else if (d->one_operand_p)
43413 {
43414 gcc_assert (nonzero_halves[0] == 0);
43415 gcc_assert (nonzero_halves[1] == 1);
43416 }
43417
43418 if (nzcnt <= 2)
43419 {
43420 if (d->perm[0] / nelt2 == nonzero_halves[1])
43421 {
43422 /* Attempt to increase the likelihood that dfinal
43423 shuffle will be intra-lane. */
43424 char tmph = nonzero_halves[0];
43425 nonzero_halves[0] = nonzero_halves[1];
43426 nonzero_halves[1] = tmph;
43427 }
43428
43429 /* vperm2f128 or vperm2i128. */
43430 for (i = 0; i < nelt2; ++i)
43431 {
43432 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43433 remap[i + nonzero_halves[0] * nelt2] = i;
43434 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43435 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43436 }
43437
43438 if (d->vmode != V8SFmode
43439 && d->vmode != V4DFmode
43440 && d->vmode != V8SImode)
43441 {
43442 dremap.vmode = V8SImode;
43443 dremap.nelt = 8;
43444 for (i = 0; i < 4; ++i)
43445 {
43446 dremap.perm[i] = i + nonzero_halves[0] * 4;
43447 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43448 }
43449 }
43450 }
43451 else if (d->one_operand_p)
43452 return false;
43453 else if (TARGET_AVX2
43454 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43455 {
43456 /* vpunpckl* */
43457 for (i = 0; i < nelt4; ++i)
43458 {
43459 remap[i] = i * 2;
43460 remap[i + nelt] = i * 2 + 1;
43461 remap[i + nelt2] = i * 2 + nelt2;
43462 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43463 dremap.perm[i * 2] = i;
43464 dremap.perm[i * 2 + 1] = i + nelt;
43465 dremap.perm[i * 2 + nelt2] = i + nelt2;
43466 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43467 }
43468 }
43469 else if (TARGET_AVX2
43470 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43471 {
43472 /* vpunpckh* */
43473 for (i = 0; i < nelt4; ++i)
43474 {
43475 remap[i + nelt4] = i * 2;
43476 remap[i + nelt + nelt4] = i * 2 + 1;
43477 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43478 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43479 dremap.perm[i * 2] = i + nelt4;
43480 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43481 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43482 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43483 }
43484 }
43485 else
43486 return false;
43487 }
43488
43489 /* Use the remapping array set up above to move the elements from their
43490 swizzled locations into their final destinations. */
43491 dfinal = *d;
43492 for (i = 0; i < nelt; ++i)
43493 {
43494 unsigned e = remap[d->perm[i]];
43495 gcc_assert (e < nelt);
43496 /* If same_halves is true, both halves of the remapped vector are the
43497 same. Avoid cross-lane accesses if possible. */
43498 if (same_halves && i >= nelt2)
43499 {
43500 gcc_assert (e < nelt2);
43501 dfinal.perm[i] = e + nelt2;
43502 }
43503 else
43504 dfinal.perm[i] = e;
43505 }
43506 if (!d->testing_p)
43507 {
43508 dremap.target = gen_reg_rtx (dremap.vmode);
43509 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43510 }
43511 dfinal.op1 = dfinal.op0;
43512 dfinal.one_operand_p = true;
43513
43514 /* Test if the final remap can be done with a single insn. For V4SFmode or
43515 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43516 start_sequence ();
43517 ok = expand_vec_perm_1 (&dfinal);
43518 seq = get_insns ();
43519 end_sequence ();
43520
43521 if (!ok)
43522 return false;
43523
43524 if (d->testing_p)
43525 return true;
43526
43527 if (dremap.vmode != dfinal.vmode)
43528 {
43529 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43530 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43531 }
43532
43533 ok = expand_vec_perm_1 (&dremap);
43534 gcc_assert (ok);
43535
43536 emit_insn (seq);
43537 return true;
43538 }
43539
43540 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43541 a single vector cross-lane permutation into vpermq followed
43542 by any of the single insn permutations. */
43543
43544 static bool
43545 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43546 {
43547 struct expand_vec_perm_d dremap, dfinal;
43548 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43549 unsigned contents[2];
43550 bool ok;
43551
43552 if (!(TARGET_AVX2
43553 && (d->vmode == V32QImode || d->vmode == V16HImode)
43554 && d->one_operand_p))
43555 return false;
43556
43557 contents[0] = 0;
43558 contents[1] = 0;
43559 for (i = 0; i < nelt2; ++i)
43560 {
43561 contents[0] |= 1u << (d->perm[i] / nelt4);
43562 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43563 }
43564
43565 for (i = 0; i < 2; ++i)
43566 {
43567 unsigned int cnt = 0;
43568 for (j = 0; j < 4; ++j)
43569 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43570 return false;
43571 }
43572
43573 if (d->testing_p)
43574 return true;
43575
43576 dremap = *d;
43577 dremap.vmode = V4DImode;
43578 dremap.nelt = 4;
43579 dremap.target = gen_reg_rtx (V4DImode);
43580 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43581 dremap.op1 = dremap.op0;
43582 dremap.one_operand_p = true;
43583 for (i = 0; i < 2; ++i)
43584 {
43585 unsigned int cnt = 0;
43586 for (j = 0; j < 4; ++j)
43587 if ((contents[i] & (1u << j)) != 0)
43588 dremap.perm[2 * i + cnt++] = j;
43589 for (; cnt < 2; ++cnt)
43590 dremap.perm[2 * i + cnt] = 0;
43591 }
43592
43593 dfinal = *d;
43594 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43595 dfinal.op1 = dfinal.op0;
43596 dfinal.one_operand_p = true;
43597 for (i = 0, j = 0; i < nelt; ++i)
43598 {
43599 if (i == nelt2)
43600 j = 2;
43601 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43602 if ((d->perm[i] / nelt4) == dremap.perm[j])
43603 ;
43604 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43605 dfinal.perm[i] |= nelt4;
43606 else
43607 gcc_unreachable ();
43608 }
43609
43610 ok = expand_vec_perm_1 (&dremap);
43611 gcc_assert (ok);
43612
43613 ok = expand_vec_perm_1 (&dfinal);
43614 gcc_assert (ok);
43615
43616 return true;
43617 }
43618
43619 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43620 a vector permutation using two instructions, vperm2f128 resp.
43621 vperm2i128 followed by any single in-lane permutation. */
43622
43623 static bool
43624 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43625 {
43626 struct expand_vec_perm_d dfirst, dsecond;
43627 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43628 bool ok;
43629
43630 if (!TARGET_AVX
43631 || GET_MODE_SIZE (d->vmode) != 32
43632 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43633 return false;
43634
43635 dsecond = *d;
43636 dsecond.one_operand_p = false;
43637 dsecond.testing_p = true;
43638
43639 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43640 immediate. For perm < 16 the second permutation uses
43641 d->op0 as first operand, for perm >= 16 it uses d->op1
43642 as first operand. The second operand is the result of
43643 vperm2[fi]128. */
43644 for (perm = 0; perm < 32; perm++)
43645 {
43646 /* Ignore permutations which do not move anything cross-lane. */
43647 if (perm < 16)
43648 {
43649 /* The second shuffle for e.g. V4DFmode has
43650 0123 and ABCD operands.
43651 Ignore AB23, as 23 is already in the second lane
43652 of the first operand. */
43653 if ((perm & 0xc) == (1 << 2)) continue;
43654 /* And 01CD, as 01 is in the first lane of the first
43655 operand. */
43656 if ((perm & 3) == 0) continue;
43657 /* And 4567, as then the vperm2[fi]128 doesn't change
43658 anything on the original 4567 second operand. */
43659 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43660 }
43661 else
43662 {
43663 /* The second shuffle for e.g. V4DFmode has
43664 4567 and ABCD operands.
43665 Ignore AB67, as 67 is already in the second lane
43666 of the first operand. */
43667 if ((perm & 0xc) == (3 << 2)) continue;
43668 /* And 45CD, as 45 is in the first lane of the first
43669 operand. */
43670 if ((perm & 3) == 2) continue;
43671 /* And 0123, as then the vperm2[fi]128 doesn't change
43672 anything on the original 0123 first operand. */
43673 if ((perm & 0xf) == (1 << 2)) continue;
43674 }
43675
43676 for (i = 0; i < nelt; i++)
43677 {
43678 j = d->perm[i] / nelt2;
43679 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43680 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43681 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43682 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43683 else
43684 break;
43685 }
43686
43687 if (i == nelt)
43688 {
43689 start_sequence ();
43690 ok = expand_vec_perm_1 (&dsecond);
43691 end_sequence ();
43692 }
43693 else
43694 ok = false;
43695
43696 if (ok)
43697 {
43698 if (d->testing_p)
43699 return true;
43700
43701 /* Found a usable second shuffle. dfirst will be
43702 vperm2f128 on d->op0 and d->op1. */
43703 dsecond.testing_p = false;
43704 dfirst = *d;
43705 dfirst.target = gen_reg_rtx (d->vmode);
43706 for (i = 0; i < nelt; i++)
43707 dfirst.perm[i] = (i & (nelt2 - 1))
43708 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43709
43710 ok = expand_vec_perm_1 (&dfirst);
43711 gcc_assert (ok);
43712
43713 /* And dsecond is some single insn shuffle, taking
43714 d->op0 and result of vperm2f128 (if perm < 16) or
43715 d->op1 and result of vperm2f128 (otherwise). */
43716 dsecond.op1 = dfirst.target;
43717 if (perm >= 16)
43718 dsecond.op0 = dfirst.op1;
43719
43720 ok = expand_vec_perm_1 (&dsecond);
43721 gcc_assert (ok);
43722
43723 return true;
43724 }
43725
43726 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43727 if (d->one_operand_p)
43728 return false;
43729 }
43730
43731 return false;
43732 }
43733
43734 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43735 a two vector permutation using 2 intra-lane interleave insns
43736 and cross-lane shuffle for 32-byte vectors. */
43737
43738 static bool
43739 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43740 {
43741 unsigned i, nelt;
43742 rtx (*gen) (rtx, rtx, rtx);
43743
43744 if (d->one_operand_p)
43745 return false;
43746 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43747 ;
43748 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43749 ;
43750 else
43751 return false;
43752
43753 nelt = d->nelt;
43754 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43755 return false;
43756 for (i = 0; i < nelt; i += 2)
43757 if (d->perm[i] != d->perm[0] + i / 2
43758 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43759 return false;
43760
43761 if (d->testing_p)
43762 return true;
43763
43764 switch (d->vmode)
43765 {
43766 case V32QImode:
43767 if (d->perm[0])
43768 gen = gen_vec_interleave_highv32qi;
43769 else
43770 gen = gen_vec_interleave_lowv32qi;
43771 break;
43772 case V16HImode:
43773 if (d->perm[0])
43774 gen = gen_vec_interleave_highv16hi;
43775 else
43776 gen = gen_vec_interleave_lowv16hi;
43777 break;
43778 case V8SImode:
43779 if (d->perm[0])
43780 gen = gen_vec_interleave_highv8si;
43781 else
43782 gen = gen_vec_interleave_lowv8si;
43783 break;
43784 case V4DImode:
43785 if (d->perm[0])
43786 gen = gen_vec_interleave_highv4di;
43787 else
43788 gen = gen_vec_interleave_lowv4di;
43789 break;
43790 case V8SFmode:
43791 if (d->perm[0])
43792 gen = gen_vec_interleave_highv8sf;
43793 else
43794 gen = gen_vec_interleave_lowv8sf;
43795 break;
43796 case V4DFmode:
43797 if (d->perm[0])
43798 gen = gen_vec_interleave_highv4df;
43799 else
43800 gen = gen_vec_interleave_lowv4df;
43801 break;
43802 default:
43803 gcc_unreachable ();
43804 }
43805
43806 emit_insn (gen (d->target, d->op0, d->op1));
43807 return true;
43808 }
43809
43810 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43811 a single vector permutation using a single intra-lane vector
43812 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43813 the non-swapped and swapped vectors together. */
43814
43815 static bool
43816 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43817 {
43818 struct expand_vec_perm_d dfirst, dsecond;
43819 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43820 rtx seq;
43821 bool ok;
43822 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43823
43824 if (!TARGET_AVX
43825 || TARGET_AVX2
43826 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43827 || !d->one_operand_p)
43828 return false;
43829
43830 dfirst = *d;
43831 for (i = 0; i < nelt; i++)
43832 dfirst.perm[i] = 0xff;
43833 for (i = 0, msk = 0; i < nelt; i++)
43834 {
43835 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43836 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43837 return false;
43838 dfirst.perm[j] = d->perm[i];
43839 if (j != i)
43840 msk |= (1 << i);
43841 }
43842 for (i = 0; i < nelt; i++)
43843 if (dfirst.perm[i] == 0xff)
43844 dfirst.perm[i] = i;
43845
43846 if (!d->testing_p)
43847 dfirst.target = gen_reg_rtx (dfirst.vmode);
43848
43849 start_sequence ();
43850 ok = expand_vec_perm_1 (&dfirst);
43851 seq = get_insns ();
43852 end_sequence ();
43853
43854 if (!ok)
43855 return false;
43856
43857 if (d->testing_p)
43858 return true;
43859
43860 emit_insn (seq);
43861
43862 dsecond = *d;
43863 dsecond.op0 = dfirst.target;
43864 dsecond.op1 = dfirst.target;
43865 dsecond.one_operand_p = true;
43866 dsecond.target = gen_reg_rtx (dsecond.vmode);
43867 for (i = 0; i < nelt; i++)
43868 dsecond.perm[i] = i ^ nelt2;
43869
43870 ok = expand_vec_perm_1 (&dsecond);
43871 gcc_assert (ok);
43872
43873 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43874 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43875 return true;
43876 }
43877
43878 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43879 permutation using two vperm2f128, followed by a vshufpd insn blending
43880 the two vectors together. */
43881
43882 static bool
43883 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43884 {
43885 struct expand_vec_perm_d dfirst, dsecond, dthird;
43886 bool ok;
43887
43888 if (!TARGET_AVX || (d->vmode != V4DFmode))
43889 return false;
43890
43891 if (d->testing_p)
43892 return true;
43893
43894 dfirst = *d;
43895 dsecond = *d;
43896 dthird = *d;
43897
43898 dfirst.perm[0] = (d->perm[0] & ~1);
43899 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43900 dfirst.perm[2] = (d->perm[2] & ~1);
43901 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43902 dsecond.perm[0] = (d->perm[1] & ~1);
43903 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43904 dsecond.perm[2] = (d->perm[3] & ~1);
43905 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43906 dthird.perm[0] = (d->perm[0] % 2);
43907 dthird.perm[1] = (d->perm[1] % 2) + 4;
43908 dthird.perm[2] = (d->perm[2] % 2) + 2;
43909 dthird.perm[3] = (d->perm[3] % 2) + 6;
43910
43911 dfirst.target = gen_reg_rtx (dfirst.vmode);
43912 dsecond.target = gen_reg_rtx (dsecond.vmode);
43913 dthird.op0 = dfirst.target;
43914 dthird.op1 = dsecond.target;
43915 dthird.one_operand_p = false;
43916
43917 canonicalize_perm (&dfirst);
43918 canonicalize_perm (&dsecond);
43919
43920 ok = expand_vec_perm_1 (&dfirst)
43921 && expand_vec_perm_1 (&dsecond)
43922 && expand_vec_perm_1 (&dthird);
43923
43924 gcc_assert (ok);
43925
43926 return true;
43927 }
43928
43929 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43930 permutation with two pshufb insns and an ior. We should have already
43931 failed all two instruction sequences. */
43932
43933 static bool
43934 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43935 {
43936 rtx rperm[2][16], vperm, l, h, op, m128;
43937 unsigned int i, nelt, eltsz;
43938
43939 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43940 return false;
43941 gcc_assert (!d->one_operand_p);
43942
43943 if (d->testing_p)
43944 return true;
43945
43946 nelt = d->nelt;
43947 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43948
43949 /* Generate two permutation masks. If the required element is within
43950 the given vector it is shuffled into the proper lane. If the required
43951 element is in the other vector, force a zero into the lane by setting
43952 bit 7 in the permutation mask. */
43953 m128 = GEN_INT (-128);
43954 for (i = 0; i < nelt; ++i)
43955 {
43956 unsigned j, e = d->perm[i];
43957 unsigned which = (e >= nelt);
43958 if (e >= nelt)
43959 e -= nelt;
43960
43961 for (j = 0; j < eltsz; ++j)
43962 {
43963 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43964 rperm[1-which][i*eltsz + j] = m128;
43965 }
43966 }
43967
43968 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43969 vperm = force_reg (V16QImode, vperm);
43970
43971 l = gen_reg_rtx (V16QImode);
43972 op = gen_lowpart (V16QImode, d->op0);
43973 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43974
43975 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43976 vperm = force_reg (V16QImode, vperm);
43977
43978 h = gen_reg_rtx (V16QImode);
43979 op = gen_lowpart (V16QImode, d->op1);
43980 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43981
43982 op = d->target;
43983 if (d->vmode != V16QImode)
43984 op = gen_reg_rtx (V16QImode);
43985 emit_insn (gen_iorv16qi3 (op, l, h));
43986 if (op != d->target)
43987 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43988
43989 return true;
43990 }
43991
43992 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43993 with two vpshufb insns, vpermq and vpor. We should have already failed
43994 all two or three instruction sequences. */
43995
43996 static bool
43997 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43998 {
43999 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44000 unsigned int i, nelt, eltsz;
44001
44002 if (!TARGET_AVX2
44003 || !d->one_operand_p
44004 || (d->vmode != V32QImode && d->vmode != V16HImode))
44005 return false;
44006
44007 if (d->testing_p)
44008 return true;
44009
44010 nelt = d->nelt;
44011 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44012
44013 /* Generate two permutation masks. If the required element is within
44014 the same lane, it is shuffled in. If the required element from the
44015 other lane, force a zero by setting bit 7 in the permutation mask.
44016 In the other mask the mask has non-negative elements if element
44017 is requested from the other lane, but also moved to the other lane,
44018 so that the result of vpshufb can have the two V2TImode halves
44019 swapped. */
44020 m128 = GEN_INT (-128);
44021 for (i = 0; i < nelt; ++i)
44022 {
44023 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44024 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44025
44026 for (j = 0; j < eltsz; ++j)
44027 {
44028 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44029 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44030 }
44031 }
44032
44033 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44034 vperm = force_reg (V32QImode, vperm);
44035
44036 h = gen_reg_rtx (V32QImode);
44037 op = gen_lowpart (V32QImode, d->op0);
44038 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44039
44040 /* Swap the 128-byte lanes of h into hp. */
44041 hp = gen_reg_rtx (V4DImode);
44042 op = gen_lowpart (V4DImode, h);
44043 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44044 const1_rtx));
44045
44046 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44047 vperm = force_reg (V32QImode, vperm);
44048
44049 l = gen_reg_rtx (V32QImode);
44050 op = gen_lowpart (V32QImode, d->op0);
44051 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44052
44053 op = d->target;
44054 if (d->vmode != V32QImode)
44055 op = gen_reg_rtx (V32QImode);
44056 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44057 if (op != d->target)
44058 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44059
44060 return true;
44061 }
44062
44063 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44064 and extract-odd permutations of two V32QImode and V16QImode operand
44065 with two vpshufb insns, vpor and vpermq. We should have already
44066 failed all two or three instruction sequences. */
44067
44068 static bool
44069 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44070 {
44071 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44072 unsigned int i, nelt, eltsz;
44073
44074 if (!TARGET_AVX2
44075 || d->one_operand_p
44076 || (d->vmode != V32QImode && d->vmode != V16HImode))
44077 return false;
44078
44079 for (i = 0; i < d->nelt; ++i)
44080 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44081 return false;
44082
44083 if (d->testing_p)
44084 return true;
44085
44086 nelt = d->nelt;
44087 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44088
44089 /* Generate two permutation masks. In the first permutation mask
44090 the first quarter will contain indexes for the first half
44091 of the op0, the second quarter will contain bit 7 set, third quarter
44092 will contain indexes for the second half of the op0 and the
44093 last quarter bit 7 set. In the second permutation mask
44094 the first quarter will contain bit 7 set, the second quarter
44095 indexes for the first half of the op1, the third quarter bit 7 set
44096 and last quarter indexes for the second half of the op1.
44097 I.e. the first mask e.g. for V32QImode extract even will be:
44098 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44099 (all values masked with 0xf except for -128) and second mask
44100 for extract even will be
44101 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44102 m128 = GEN_INT (-128);
44103 for (i = 0; i < nelt; ++i)
44104 {
44105 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44106 unsigned which = d->perm[i] >= nelt;
44107 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44108
44109 for (j = 0; j < eltsz; ++j)
44110 {
44111 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44112 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44113 }
44114 }
44115
44116 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44117 vperm = force_reg (V32QImode, vperm);
44118
44119 l = gen_reg_rtx (V32QImode);
44120 op = gen_lowpart (V32QImode, d->op0);
44121 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44122
44123 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44124 vperm = force_reg (V32QImode, vperm);
44125
44126 h = gen_reg_rtx (V32QImode);
44127 op = gen_lowpart (V32QImode, d->op1);
44128 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44129
44130 ior = gen_reg_rtx (V32QImode);
44131 emit_insn (gen_iorv32qi3 (ior, l, h));
44132
44133 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44134 op = gen_reg_rtx (V4DImode);
44135 ior = gen_lowpart (V4DImode, ior);
44136 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44137 const1_rtx, GEN_INT (3)));
44138 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44139
44140 return true;
44141 }
44142
44143 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44144 and extract-odd permutations. */
44145
44146 static bool
44147 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44148 {
44149 rtx t1, t2, t3, t4, t5;
44150
44151 switch (d->vmode)
44152 {
44153 case V4DFmode:
44154 if (d->testing_p)
44155 break;
44156 t1 = gen_reg_rtx (V4DFmode);
44157 t2 = gen_reg_rtx (V4DFmode);
44158
44159 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44160 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44161 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44162
44163 /* Now an unpck[lh]pd will produce the result required. */
44164 if (odd)
44165 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44166 else
44167 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44168 emit_insn (t3);
44169 break;
44170
44171 case V8SFmode:
44172 {
44173 int mask = odd ? 0xdd : 0x88;
44174
44175 if (d->testing_p)
44176 break;
44177 t1 = gen_reg_rtx (V8SFmode);
44178 t2 = gen_reg_rtx (V8SFmode);
44179 t3 = gen_reg_rtx (V8SFmode);
44180
44181 /* Shuffle within the 128-bit lanes to produce:
44182 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44183 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44184 GEN_INT (mask)));
44185
44186 /* Shuffle the lanes around to produce:
44187 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44188 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44189 GEN_INT (0x3)));
44190
44191 /* Shuffle within the 128-bit lanes to produce:
44192 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44193 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44194
44195 /* Shuffle within the 128-bit lanes to produce:
44196 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44197 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44198
44199 /* Shuffle the lanes around to produce:
44200 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44201 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44202 GEN_INT (0x20)));
44203 }
44204 break;
44205
44206 case V2DFmode:
44207 case V4SFmode:
44208 case V2DImode:
44209 case V4SImode:
44210 /* These are always directly implementable by expand_vec_perm_1. */
44211 gcc_unreachable ();
44212
44213 case V8HImode:
44214 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44215 return expand_vec_perm_pshufb2 (d);
44216 else
44217 {
44218 if (d->testing_p)
44219 break;
44220 /* We need 2*log2(N)-1 operations to achieve odd/even
44221 with interleave. */
44222 t1 = gen_reg_rtx (V8HImode);
44223 t2 = gen_reg_rtx (V8HImode);
44224 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44225 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44226 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44227 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44228 if (odd)
44229 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44230 else
44231 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44232 emit_insn (t3);
44233 }
44234 break;
44235
44236 case V16QImode:
44237 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44238 return expand_vec_perm_pshufb2 (d);
44239 else
44240 {
44241 if (d->testing_p)
44242 break;
44243 t1 = gen_reg_rtx (V16QImode);
44244 t2 = gen_reg_rtx (V16QImode);
44245 t3 = gen_reg_rtx (V16QImode);
44246 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44247 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44248 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44249 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44250 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44251 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44252 if (odd)
44253 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44254 else
44255 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44256 emit_insn (t3);
44257 }
44258 break;
44259
44260 case V16HImode:
44261 case V32QImode:
44262 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44263
44264 case V4DImode:
44265 if (!TARGET_AVX2)
44266 {
44267 struct expand_vec_perm_d d_copy = *d;
44268 d_copy.vmode = V4DFmode;
44269 if (d->testing_p)
44270 d_copy.target = gen_lowpart (V4DFmode, d->target);
44271 else
44272 d_copy.target = gen_reg_rtx (V4DFmode);
44273 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44274 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44275 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44276 {
44277 if (!d->testing_p)
44278 emit_move_insn (d->target,
44279 gen_lowpart (V4DImode, d_copy.target));
44280 return true;
44281 }
44282 return false;
44283 }
44284
44285 if (d->testing_p)
44286 break;
44287
44288 t1 = gen_reg_rtx (V4DImode);
44289 t2 = gen_reg_rtx (V4DImode);
44290
44291 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44292 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44293 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44294
44295 /* Now an vpunpck[lh]qdq will produce the result required. */
44296 if (odd)
44297 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44298 else
44299 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44300 emit_insn (t3);
44301 break;
44302
44303 case V8SImode:
44304 if (!TARGET_AVX2)
44305 {
44306 struct expand_vec_perm_d d_copy = *d;
44307 d_copy.vmode = V8SFmode;
44308 if (d->testing_p)
44309 d_copy.target = gen_lowpart (V8SFmode, d->target);
44310 else
44311 d_copy.target = gen_reg_rtx (V8SFmode);
44312 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44313 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44314 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44315 {
44316 if (!d->testing_p)
44317 emit_move_insn (d->target,
44318 gen_lowpart (V8SImode, d_copy.target));
44319 return true;
44320 }
44321 return false;
44322 }
44323
44324 if (d->testing_p)
44325 break;
44326
44327 t1 = gen_reg_rtx (V8SImode);
44328 t2 = gen_reg_rtx (V8SImode);
44329 t3 = gen_reg_rtx (V4DImode);
44330 t4 = gen_reg_rtx (V4DImode);
44331 t5 = gen_reg_rtx (V4DImode);
44332
44333 /* Shuffle the lanes around into
44334 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44335 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44336 gen_lowpart (V4DImode, d->op1),
44337 GEN_INT (0x20)));
44338 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44339 gen_lowpart (V4DImode, d->op1),
44340 GEN_INT (0x31)));
44341
44342 /* Swap the 2nd and 3rd position in each lane into
44343 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44344 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44345 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44346 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44347 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44348
44349 /* Now an vpunpck[lh]qdq will produce
44350 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44351 if (odd)
44352 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44353 gen_lowpart (V4DImode, t2));
44354 else
44355 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44356 gen_lowpart (V4DImode, t2));
44357 emit_insn (t3);
44358 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44359 break;
44360
44361 default:
44362 gcc_unreachable ();
44363 }
44364
44365 return true;
44366 }
44367
44368 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44369 extract-even and extract-odd permutations. */
44370
44371 static bool
44372 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44373 {
44374 unsigned i, odd, nelt = d->nelt;
44375
44376 odd = d->perm[0];
44377 if (odd != 0 && odd != 1)
44378 return false;
44379
44380 for (i = 1; i < nelt; ++i)
44381 if (d->perm[i] != 2 * i + odd)
44382 return false;
44383
44384 return expand_vec_perm_even_odd_1 (d, odd);
44385 }
44386
44387 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44388 permutations. We assume that expand_vec_perm_1 has already failed. */
44389
44390 static bool
44391 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44392 {
44393 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44394 enum machine_mode vmode = d->vmode;
44395 unsigned char perm2[4];
44396 rtx op0 = d->op0, dest;
44397 bool ok;
44398
44399 switch (vmode)
44400 {
44401 case V4DFmode:
44402 case V8SFmode:
44403 /* These are special-cased in sse.md so that we can optionally
44404 use the vbroadcast instruction. They expand to two insns
44405 if the input happens to be in a register. */
44406 gcc_unreachable ();
44407
44408 case V2DFmode:
44409 case V2DImode:
44410 case V4SFmode:
44411 case V4SImode:
44412 /* These are always implementable using standard shuffle patterns. */
44413 gcc_unreachable ();
44414
44415 case V8HImode:
44416 case V16QImode:
44417 /* These can be implemented via interleave. We save one insn by
44418 stopping once we have promoted to V4SImode and then use pshufd. */
44419 if (d->testing_p)
44420 return true;
44421 do
44422 {
44423 rtx dest;
44424 rtx (*gen) (rtx, rtx, rtx)
44425 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44426 : gen_vec_interleave_lowv8hi;
44427
44428 if (elt >= nelt2)
44429 {
44430 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44431 : gen_vec_interleave_highv8hi;
44432 elt -= nelt2;
44433 }
44434 nelt2 /= 2;
44435
44436 dest = gen_reg_rtx (vmode);
44437 emit_insn (gen (dest, op0, op0));
44438 vmode = get_mode_wider_vector (vmode);
44439 op0 = gen_lowpart (vmode, dest);
44440 }
44441 while (vmode != V4SImode);
44442
44443 memset (perm2, elt, 4);
44444 dest = gen_reg_rtx (V4SImode);
44445 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44446 gcc_assert (ok);
44447 if (!d->testing_p)
44448 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44449 return true;
44450
44451 case V32QImode:
44452 case V16HImode:
44453 case V8SImode:
44454 case V4DImode:
44455 /* For AVX2 broadcasts of the first element vpbroadcast* or
44456 vpermq should be used by expand_vec_perm_1. */
44457 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44458 return false;
44459
44460 default:
44461 gcc_unreachable ();
44462 }
44463 }
44464
44465 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44466 broadcast permutations. */
44467
44468 static bool
44469 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44470 {
44471 unsigned i, elt, nelt = d->nelt;
44472
44473 if (!d->one_operand_p)
44474 return false;
44475
44476 elt = d->perm[0];
44477 for (i = 1; i < nelt; ++i)
44478 if (d->perm[i] != elt)
44479 return false;
44480
44481 return expand_vec_perm_broadcast_1 (d);
44482 }
44483
44484 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44485 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44486 all the shorter instruction sequences. */
44487
44488 static bool
44489 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44490 {
44491 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44492 unsigned int i, nelt, eltsz;
44493 bool used[4];
44494
44495 if (!TARGET_AVX2
44496 || d->one_operand_p
44497 || (d->vmode != V32QImode && d->vmode != V16HImode))
44498 return false;
44499
44500 if (d->testing_p)
44501 return true;
44502
44503 nelt = d->nelt;
44504 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44505
44506 /* Generate 4 permutation masks. If the required element is within
44507 the same lane, it is shuffled in. If the required element from the
44508 other lane, force a zero by setting bit 7 in the permutation mask.
44509 In the other mask the mask has non-negative elements if element
44510 is requested from the other lane, but also moved to the other lane,
44511 so that the result of vpshufb can have the two V2TImode halves
44512 swapped. */
44513 m128 = GEN_INT (-128);
44514 for (i = 0; i < 32; ++i)
44515 {
44516 rperm[0][i] = m128;
44517 rperm[1][i] = m128;
44518 rperm[2][i] = m128;
44519 rperm[3][i] = m128;
44520 }
44521 used[0] = false;
44522 used[1] = false;
44523 used[2] = false;
44524 used[3] = false;
44525 for (i = 0; i < nelt; ++i)
44526 {
44527 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44528 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44529 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44530
44531 for (j = 0; j < eltsz; ++j)
44532 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44533 used[which] = true;
44534 }
44535
44536 for (i = 0; i < 2; ++i)
44537 {
44538 if (!used[2 * i + 1])
44539 {
44540 h[i] = NULL_RTX;
44541 continue;
44542 }
44543 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44544 gen_rtvec_v (32, rperm[2 * i + 1]));
44545 vperm = force_reg (V32QImode, vperm);
44546 h[i] = gen_reg_rtx (V32QImode);
44547 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44548 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44549 }
44550
44551 /* Swap the 128-byte lanes of h[X]. */
44552 for (i = 0; i < 2; ++i)
44553 {
44554 if (h[i] == NULL_RTX)
44555 continue;
44556 op = gen_reg_rtx (V4DImode);
44557 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44558 const2_rtx, GEN_INT (3), const0_rtx,
44559 const1_rtx));
44560 h[i] = gen_lowpart (V32QImode, op);
44561 }
44562
44563 for (i = 0; i < 2; ++i)
44564 {
44565 if (!used[2 * i])
44566 {
44567 l[i] = NULL_RTX;
44568 continue;
44569 }
44570 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44571 vperm = force_reg (V32QImode, vperm);
44572 l[i] = gen_reg_rtx (V32QImode);
44573 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44574 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44575 }
44576
44577 for (i = 0; i < 2; ++i)
44578 {
44579 if (h[i] && l[i])
44580 {
44581 op = gen_reg_rtx (V32QImode);
44582 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44583 l[i] = op;
44584 }
44585 else if (h[i])
44586 l[i] = h[i];
44587 }
44588
44589 gcc_assert (l[0] && l[1]);
44590 op = d->target;
44591 if (d->vmode != V32QImode)
44592 op = gen_reg_rtx (V32QImode);
44593 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44594 if (op != d->target)
44595 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44596 return true;
44597 }
44598
44599 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44600 With all of the interface bits taken care of, perform the expansion
44601 in D and return true on success. */
44602
44603 static bool
44604 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44605 {
44606 /* Try a single instruction expansion. */
44607 if (expand_vec_perm_1 (d))
44608 return true;
44609
44610 /* Try sequences of two instructions. */
44611
44612 if (expand_vec_perm_pshuflw_pshufhw (d))
44613 return true;
44614
44615 if (expand_vec_perm_palignr (d))
44616 return true;
44617
44618 if (expand_vec_perm_interleave2 (d))
44619 return true;
44620
44621 if (expand_vec_perm_broadcast (d))
44622 return true;
44623
44624 if (expand_vec_perm_vpermq_perm_1 (d))
44625 return true;
44626
44627 if (expand_vec_perm_vperm2f128 (d))
44628 return true;
44629
44630 if (expand_vec_perm_pblendv (d))
44631 return true;
44632
44633 /* Try sequences of three instructions. */
44634
44635 if (expand_vec_perm_2vperm2f128_vshuf (d))
44636 return true;
44637
44638 if (expand_vec_perm_pshufb2 (d))
44639 return true;
44640
44641 if (expand_vec_perm_interleave3 (d))
44642 return true;
44643
44644 if (expand_vec_perm_vperm2f128_vblend (d))
44645 return true;
44646
44647 /* Try sequences of four instructions. */
44648
44649 if (expand_vec_perm_vpshufb2_vpermq (d))
44650 return true;
44651
44652 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44653 return true;
44654
44655 /* ??? Look for narrow permutations whose element orderings would
44656 allow the promotion to a wider mode. */
44657
44658 /* ??? Look for sequences of interleave or a wider permute that place
44659 the data into the correct lanes for a half-vector shuffle like
44660 pshuf[lh]w or vpermilps. */
44661
44662 /* ??? Look for sequences of interleave that produce the desired results.
44663 The combinatorics of punpck[lh] get pretty ugly... */
44664
44665 if (expand_vec_perm_even_odd (d))
44666 return true;
44667
44668 /* Even longer sequences. */
44669 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44670 return true;
44671
44672 return false;
44673 }
44674
44675 /* If a permutation only uses one operand, make it clear. Returns true
44676 if the permutation references both operands. */
44677
44678 static bool
44679 canonicalize_perm (struct expand_vec_perm_d *d)
44680 {
44681 int i, which, nelt = d->nelt;
44682
44683 for (i = which = 0; i < nelt; ++i)
44684 which |= (d->perm[i] < nelt ? 1 : 2);
44685
44686 d->one_operand_p = true;
44687 switch (which)
44688 {
44689 default:
44690 gcc_unreachable();
44691
44692 case 3:
44693 if (!rtx_equal_p (d->op0, d->op1))
44694 {
44695 d->one_operand_p = false;
44696 break;
44697 }
44698 /* The elements of PERM do not suggest that only the first operand
44699 is used, but both operands are identical. Allow easier matching
44700 of the permutation by folding the permutation into the single
44701 input vector. */
44702 /* FALLTHRU */
44703
44704 case 2:
44705 for (i = 0; i < nelt; ++i)
44706 d->perm[i] &= nelt - 1;
44707 d->op0 = d->op1;
44708 break;
44709
44710 case 1:
44711 d->op1 = d->op0;
44712 break;
44713 }
44714
44715 return (which == 3);
44716 }
44717
44718 bool
44719 ix86_expand_vec_perm_const (rtx operands[4])
44720 {
44721 struct expand_vec_perm_d d;
44722 unsigned char perm[MAX_VECT_LEN];
44723 int i, nelt;
44724 bool two_args;
44725 rtx sel;
44726
44727 d.target = operands[0];
44728 d.op0 = operands[1];
44729 d.op1 = operands[2];
44730 sel = operands[3];
44731
44732 d.vmode = GET_MODE (d.target);
44733 gcc_assert (VECTOR_MODE_P (d.vmode));
44734 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44735 d.testing_p = false;
44736
44737 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44738 gcc_assert (XVECLEN (sel, 0) == nelt);
44739 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44740
44741 for (i = 0; i < nelt; ++i)
44742 {
44743 rtx e = XVECEXP (sel, 0, i);
44744 int ei = INTVAL (e) & (2 * nelt - 1);
44745 d.perm[i] = ei;
44746 perm[i] = ei;
44747 }
44748
44749 two_args = canonicalize_perm (&d);
44750
44751 if (ix86_expand_vec_perm_const_1 (&d))
44752 return true;
44753
44754 /* If the selector says both arguments are needed, but the operands are the
44755 same, the above tried to expand with one_operand_p and flattened selector.
44756 If that didn't work, retry without one_operand_p; we succeeded with that
44757 during testing. */
44758 if (two_args && d.one_operand_p)
44759 {
44760 d.one_operand_p = false;
44761 memcpy (d.perm, perm, sizeof (perm));
44762 return ix86_expand_vec_perm_const_1 (&d);
44763 }
44764
44765 return false;
44766 }
44767
44768 /* Implement targetm.vectorize.vec_perm_const_ok. */
44769
44770 static bool
44771 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44772 const unsigned char *sel)
44773 {
44774 struct expand_vec_perm_d d;
44775 unsigned int i, nelt, which;
44776 bool ret;
44777
44778 d.vmode = vmode;
44779 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44780 d.testing_p = true;
44781
44782 /* Given sufficient ISA support we can just return true here
44783 for selected vector modes. */
44784 if (d.vmode == V16SImode || d.vmode == V16SFmode
44785 || d.vmode == V8DFmode || d.vmode == V8DImode)
44786 /* All implementable with a single vpermi2 insn. */
44787 return true;
44788 if (GET_MODE_SIZE (d.vmode) == 16)
44789 {
44790 /* All implementable with a single vpperm insn. */
44791 if (TARGET_XOP)
44792 return true;
44793 /* All implementable with 2 pshufb + 1 ior. */
44794 if (TARGET_SSSE3)
44795 return true;
44796 /* All implementable with shufpd or unpck[lh]pd. */
44797 if (d.nelt == 2)
44798 return true;
44799 }
44800
44801 /* Extract the values from the vector CST into the permutation
44802 array in D. */
44803 memcpy (d.perm, sel, nelt);
44804 for (i = which = 0; i < nelt; ++i)
44805 {
44806 unsigned char e = d.perm[i];
44807 gcc_assert (e < 2 * nelt);
44808 which |= (e < nelt ? 1 : 2);
44809 }
44810
44811 /* For all elements from second vector, fold the elements to first. */
44812 if (which == 2)
44813 for (i = 0; i < nelt; ++i)
44814 d.perm[i] -= nelt;
44815
44816 /* Check whether the mask can be applied to the vector type. */
44817 d.one_operand_p = (which != 3);
44818
44819 /* Implementable with shufps or pshufd. */
44820 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44821 return true;
44822
44823 /* Otherwise we have to go through the motions and see if we can
44824 figure out how to generate the requested permutation. */
44825 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44826 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44827 if (!d.one_operand_p)
44828 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44829
44830 start_sequence ();
44831 ret = ix86_expand_vec_perm_const_1 (&d);
44832 end_sequence ();
44833
44834 return ret;
44835 }
44836
44837 void
44838 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44839 {
44840 struct expand_vec_perm_d d;
44841 unsigned i, nelt;
44842
44843 d.target = targ;
44844 d.op0 = op0;
44845 d.op1 = op1;
44846 d.vmode = GET_MODE (targ);
44847 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44848 d.one_operand_p = false;
44849 d.testing_p = false;
44850
44851 for (i = 0; i < nelt; ++i)
44852 d.perm[i] = i * 2 + odd;
44853
44854 /* We'll either be able to implement the permutation directly... */
44855 if (expand_vec_perm_1 (&d))
44856 return;
44857
44858 /* ... or we use the special-case patterns. */
44859 expand_vec_perm_even_odd_1 (&d, odd);
44860 }
44861
44862 static void
44863 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44864 {
44865 struct expand_vec_perm_d d;
44866 unsigned i, nelt, base;
44867 bool ok;
44868
44869 d.target = targ;
44870 d.op0 = op0;
44871 d.op1 = op1;
44872 d.vmode = GET_MODE (targ);
44873 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44874 d.one_operand_p = false;
44875 d.testing_p = false;
44876
44877 base = high_p ? nelt / 2 : 0;
44878 for (i = 0; i < nelt / 2; ++i)
44879 {
44880 d.perm[i * 2] = i + base;
44881 d.perm[i * 2 + 1] = i + base + nelt;
44882 }
44883
44884 /* Note that for AVX this isn't one instruction. */
44885 ok = ix86_expand_vec_perm_const_1 (&d);
44886 gcc_assert (ok);
44887 }
44888
44889
44890 /* Expand a vector operation CODE for a V*QImode in terms of the
44891 same operation on V*HImode. */
44892
44893 void
44894 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44895 {
44896 enum machine_mode qimode = GET_MODE (dest);
44897 enum machine_mode himode;
44898 rtx (*gen_il) (rtx, rtx, rtx);
44899 rtx (*gen_ih) (rtx, rtx, rtx);
44900 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44901 struct expand_vec_perm_d d;
44902 bool ok, full_interleave;
44903 bool uns_p = false;
44904 int i;
44905
44906 switch (qimode)
44907 {
44908 case V16QImode:
44909 himode = V8HImode;
44910 gen_il = gen_vec_interleave_lowv16qi;
44911 gen_ih = gen_vec_interleave_highv16qi;
44912 break;
44913 case V32QImode:
44914 himode = V16HImode;
44915 gen_il = gen_avx2_interleave_lowv32qi;
44916 gen_ih = gen_avx2_interleave_highv32qi;
44917 break;
44918 default:
44919 gcc_unreachable ();
44920 }
44921
44922 op2_l = op2_h = op2;
44923 switch (code)
44924 {
44925 case MULT:
44926 /* Unpack data such that we've got a source byte in each low byte of
44927 each word. We don't care what goes into the high byte of each word.
44928 Rather than trying to get zero in there, most convenient is to let
44929 it be a copy of the low byte. */
44930 op2_l = gen_reg_rtx (qimode);
44931 op2_h = gen_reg_rtx (qimode);
44932 emit_insn (gen_il (op2_l, op2, op2));
44933 emit_insn (gen_ih (op2_h, op2, op2));
44934 /* FALLTHRU */
44935
44936 op1_l = gen_reg_rtx (qimode);
44937 op1_h = gen_reg_rtx (qimode);
44938 emit_insn (gen_il (op1_l, op1, op1));
44939 emit_insn (gen_ih (op1_h, op1, op1));
44940 full_interleave = qimode == V16QImode;
44941 break;
44942
44943 case ASHIFT:
44944 case LSHIFTRT:
44945 uns_p = true;
44946 /* FALLTHRU */
44947 case ASHIFTRT:
44948 op1_l = gen_reg_rtx (himode);
44949 op1_h = gen_reg_rtx (himode);
44950 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44951 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44952 full_interleave = true;
44953 break;
44954 default:
44955 gcc_unreachable ();
44956 }
44957
44958 /* Perform the operation. */
44959 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44960 1, OPTAB_DIRECT);
44961 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44962 1, OPTAB_DIRECT);
44963 gcc_assert (res_l && res_h);
44964
44965 /* Merge the data back into the right place. */
44966 d.target = dest;
44967 d.op0 = gen_lowpart (qimode, res_l);
44968 d.op1 = gen_lowpart (qimode, res_h);
44969 d.vmode = qimode;
44970 d.nelt = GET_MODE_NUNITS (qimode);
44971 d.one_operand_p = false;
44972 d.testing_p = false;
44973
44974 if (full_interleave)
44975 {
44976 /* For SSE2, we used an full interleave, so the desired
44977 results are in the even elements. */
44978 for (i = 0; i < 32; ++i)
44979 d.perm[i] = i * 2;
44980 }
44981 else
44982 {
44983 /* For AVX, the interleave used above was not cross-lane. So the
44984 extraction is evens but with the second and third quarter swapped.
44985 Happily, that is even one insn shorter than even extraction. */
44986 for (i = 0; i < 32; ++i)
44987 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44988 }
44989
44990 ok = ix86_expand_vec_perm_const_1 (&d);
44991 gcc_assert (ok);
44992
44993 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44994 gen_rtx_fmt_ee (code, qimode, op1, op2));
44995 }
44996
44997 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44998 if op is CONST_VECTOR with all odd elements equal to their
44999 preceding element. */
45000
45001 static bool
45002 const_vector_equal_evenodd_p (rtx op)
45003 {
45004 enum machine_mode mode = GET_MODE (op);
45005 int i, nunits = GET_MODE_NUNITS (mode);
45006 if (GET_CODE (op) != CONST_VECTOR
45007 || nunits != CONST_VECTOR_NUNITS (op))
45008 return false;
45009 for (i = 0; i < nunits; i += 2)
45010 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45011 return false;
45012 return true;
45013 }
45014
45015 void
45016 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45017 bool uns_p, bool odd_p)
45018 {
45019 enum machine_mode mode = GET_MODE (op1);
45020 enum machine_mode wmode = GET_MODE (dest);
45021 rtx x;
45022 rtx orig_op1 = op1, orig_op2 = op2;
45023
45024 if (!nonimmediate_operand (op1, mode))
45025 op1 = force_reg (mode, op1);
45026 if (!nonimmediate_operand (op2, mode))
45027 op2 = force_reg (mode, op2);
45028
45029 /* We only play even/odd games with vectors of SImode. */
45030 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45031
45032 /* If we're looking for the odd results, shift those members down to
45033 the even slots. For some cpus this is faster than a PSHUFD. */
45034 if (odd_p)
45035 {
45036 /* For XOP use vpmacsdqh, but only for smult, as it is only
45037 signed. */
45038 if (TARGET_XOP && mode == V4SImode && !uns_p)
45039 {
45040 x = force_reg (wmode, CONST0_RTX (wmode));
45041 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45042 return;
45043 }
45044
45045 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45046 if (!const_vector_equal_evenodd_p (orig_op1))
45047 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45048 x, NULL, 1, OPTAB_DIRECT);
45049 if (!const_vector_equal_evenodd_p (orig_op2))
45050 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45051 x, NULL, 1, OPTAB_DIRECT);
45052 op1 = gen_lowpart (mode, op1);
45053 op2 = gen_lowpart (mode, op2);
45054 }
45055
45056 if (mode == V16SImode)
45057 {
45058 if (uns_p)
45059 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45060 else
45061 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45062 }
45063 else if (mode == V8SImode)
45064 {
45065 if (uns_p)
45066 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45067 else
45068 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45069 }
45070 else if (uns_p)
45071 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45072 else if (TARGET_SSE4_1)
45073 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45074 else
45075 {
45076 rtx s1, s2, t0, t1, t2;
45077
45078 /* The easiest way to implement this without PMULDQ is to go through
45079 the motions as if we are performing a full 64-bit multiply. With
45080 the exception that we need to do less shuffling of the elements. */
45081
45082 /* Compute the sign-extension, aka highparts, of the two operands. */
45083 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45084 op1, pc_rtx, pc_rtx);
45085 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45086 op2, pc_rtx, pc_rtx);
45087
45088 /* Multiply LO(A) * HI(B), and vice-versa. */
45089 t1 = gen_reg_rtx (wmode);
45090 t2 = gen_reg_rtx (wmode);
45091 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45092 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45093
45094 /* Multiply LO(A) * LO(B). */
45095 t0 = gen_reg_rtx (wmode);
45096 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45097
45098 /* Combine and shift the highparts into place. */
45099 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45100 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45101 1, OPTAB_DIRECT);
45102
45103 /* Combine high and low parts. */
45104 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45105 return;
45106 }
45107 emit_insn (x);
45108 }
45109
45110 void
45111 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45112 bool uns_p, bool high_p)
45113 {
45114 enum machine_mode wmode = GET_MODE (dest);
45115 enum machine_mode mode = GET_MODE (op1);
45116 rtx t1, t2, t3, t4, mask;
45117
45118 switch (mode)
45119 {
45120 case V4SImode:
45121 t1 = gen_reg_rtx (mode);
45122 t2 = gen_reg_rtx (mode);
45123 if (TARGET_XOP && !uns_p)
45124 {
45125 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45126 shuffle the elements once so that all elements are in the right
45127 place for immediate use: { A C B D }. */
45128 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45129 const1_rtx, GEN_INT (3)));
45130 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45131 const1_rtx, GEN_INT (3)));
45132 }
45133 else
45134 {
45135 /* Put the elements into place for the multiply. */
45136 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45137 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45138 high_p = false;
45139 }
45140 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45141 break;
45142
45143 case V8SImode:
45144 /* Shuffle the elements between the lanes. After this we
45145 have { A B E F | C D G H } for each operand. */
45146 t1 = gen_reg_rtx (V4DImode);
45147 t2 = gen_reg_rtx (V4DImode);
45148 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45149 const0_rtx, const2_rtx,
45150 const1_rtx, GEN_INT (3)));
45151 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45152 const0_rtx, const2_rtx,
45153 const1_rtx, GEN_INT (3)));
45154
45155 /* Shuffle the elements within the lanes. After this we
45156 have { A A B B | C C D D } or { E E F F | G G H H }. */
45157 t3 = gen_reg_rtx (V8SImode);
45158 t4 = gen_reg_rtx (V8SImode);
45159 mask = GEN_INT (high_p
45160 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45161 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45162 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45163 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45164
45165 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45166 break;
45167
45168 case V8HImode:
45169 case V16HImode:
45170 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45171 uns_p, OPTAB_DIRECT);
45172 t2 = expand_binop (mode,
45173 uns_p ? umul_highpart_optab : smul_highpart_optab,
45174 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45175 gcc_assert (t1 && t2);
45176
45177 t3 = gen_reg_rtx (mode);
45178 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45179 emit_move_insn (dest, gen_lowpart (wmode, t3));
45180 break;
45181
45182 case V16QImode:
45183 case V32QImode:
45184 t1 = gen_reg_rtx (wmode);
45185 t2 = gen_reg_rtx (wmode);
45186 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45187 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45188
45189 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45190 break;
45191
45192 default:
45193 gcc_unreachable ();
45194 }
45195 }
45196
45197 void
45198 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45199 {
45200 rtx res_1, res_2, res_3, res_4;
45201
45202 res_1 = gen_reg_rtx (V4SImode);
45203 res_2 = gen_reg_rtx (V4SImode);
45204 res_3 = gen_reg_rtx (V2DImode);
45205 res_4 = gen_reg_rtx (V2DImode);
45206 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45207 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45208
45209 /* Move the results in element 2 down to element 1; we don't care
45210 what goes in elements 2 and 3. Then we can merge the parts
45211 back together with an interleave.
45212
45213 Note that two other sequences were tried:
45214 (1) Use interleaves at the start instead of psrldq, which allows
45215 us to use a single shufps to merge things back at the end.
45216 (2) Use shufps here to combine the two vectors, then pshufd to
45217 put the elements in the correct order.
45218 In both cases the cost of the reformatting stall was too high
45219 and the overall sequence slower. */
45220
45221 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45222 const0_rtx, const2_rtx,
45223 const0_rtx, const0_rtx));
45224 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45225 const0_rtx, const2_rtx,
45226 const0_rtx, const0_rtx));
45227 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45228
45229 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45230 }
45231
45232 void
45233 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45234 {
45235 enum machine_mode mode = GET_MODE (op0);
45236 rtx t1, t2, t3, t4, t5, t6;
45237
45238 if (TARGET_XOP && mode == V2DImode)
45239 {
45240 /* op1: A,B,C,D, op2: E,F,G,H */
45241 op1 = gen_lowpart (V4SImode, op1);
45242 op2 = gen_lowpart (V4SImode, op2);
45243
45244 t1 = gen_reg_rtx (V4SImode);
45245 t2 = gen_reg_rtx (V4SImode);
45246 t3 = gen_reg_rtx (V2DImode);
45247 t4 = gen_reg_rtx (V2DImode);
45248
45249 /* t1: B,A,D,C */
45250 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45251 GEN_INT (1),
45252 GEN_INT (0),
45253 GEN_INT (3),
45254 GEN_INT (2)));
45255
45256 /* t2: (B*E),(A*F),(D*G),(C*H) */
45257 emit_insn (gen_mulv4si3 (t2, t1, op2));
45258
45259 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45260 emit_insn (gen_xop_phadddq (t3, t2));
45261
45262 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45263 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45264
45265 /* Multiply lower parts and add all */
45266 t5 = gen_reg_rtx (V2DImode);
45267 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45268 gen_lowpart (V4SImode, op1),
45269 gen_lowpart (V4SImode, op2)));
45270 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45271
45272 }
45273 else
45274 {
45275 enum machine_mode nmode;
45276 rtx (*umul) (rtx, rtx, rtx);
45277
45278 if (mode == V2DImode)
45279 {
45280 umul = gen_vec_widen_umult_even_v4si;
45281 nmode = V4SImode;
45282 }
45283 else if (mode == V4DImode)
45284 {
45285 umul = gen_vec_widen_umult_even_v8si;
45286 nmode = V8SImode;
45287 }
45288 else if (mode == V8DImode)
45289 {
45290 umul = gen_vec_widen_umult_even_v16si;
45291 nmode = V16SImode;
45292 }
45293 else
45294 gcc_unreachable ();
45295
45296
45297 /* Multiply low parts. */
45298 t1 = gen_reg_rtx (mode);
45299 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45300
45301 /* Shift input vectors right 32 bits so we can multiply high parts. */
45302 t6 = GEN_INT (32);
45303 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45304 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45305
45306 /* Multiply high parts by low parts. */
45307 t4 = gen_reg_rtx (mode);
45308 t5 = gen_reg_rtx (mode);
45309 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45310 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45311
45312 /* Combine and shift the highparts back. */
45313 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45314 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45315
45316 /* Combine high and low parts. */
45317 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45318 }
45319
45320 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45321 gen_rtx_MULT (mode, op1, op2));
45322 }
45323
45324 /* Calculate integer abs() using only SSE2 instructions. */
45325
45326 void
45327 ix86_expand_sse2_abs (rtx target, rtx input)
45328 {
45329 enum machine_mode mode = GET_MODE (target);
45330 rtx tmp0, tmp1, x;
45331
45332 switch (mode)
45333 {
45334 /* For 32-bit signed integer X, the best way to calculate the absolute
45335 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45336 case V4SImode:
45337 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45338 GEN_INT (GET_MODE_BITSIZE
45339 (GET_MODE_INNER (mode)) - 1),
45340 NULL, 0, OPTAB_DIRECT);
45341 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45342 NULL, 0, OPTAB_DIRECT);
45343 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45344 target, 0, OPTAB_DIRECT);
45345 break;
45346
45347 /* For 16-bit signed integer X, the best way to calculate the absolute
45348 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45349 case V8HImode:
45350 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45351
45352 x = expand_simple_binop (mode, SMAX, tmp0, input,
45353 target, 0, OPTAB_DIRECT);
45354 break;
45355
45356 /* For 8-bit signed integer X, the best way to calculate the absolute
45357 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45358 as SSE2 provides the PMINUB insn. */
45359 case V16QImode:
45360 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45361
45362 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45363 target, 0, OPTAB_DIRECT);
45364 break;
45365
45366 default:
45367 gcc_unreachable ();
45368 }
45369
45370 if (x != target)
45371 emit_move_insn (target, x);
45372 }
45373
45374 /* Expand an insert into a vector register through pinsr insn.
45375 Return true if successful. */
45376
45377 bool
45378 ix86_expand_pinsr (rtx *operands)
45379 {
45380 rtx dst = operands[0];
45381 rtx src = operands[3];
45382
45383 unsigned int size = INTVAL (operands[1]);
45384 unsigned int pos = INTVAL (operands[2]);
45385
45386 if (GET_CODE (dst) == SUBREG)
45387 {
45388 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45389 dst = SUBREG_REG (dst);
45390 }
45391
45392 if (GET_CODE (src) == SUBREG)
45393 src = SUBREG_REG (src);
45394
45395 switch (GET_MODE (dst))
45396 {
45397 case V16QImode:
45398 case V8HImode:
45399 case V4SImode:
45400 case V2DImode:
45401 {
45402 enum machine_mode srcmode, dstmode;
45403 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45404
45405 srcmode = mode_for_size (size, MODE_INT, 0);
45406
45407 switch (srcmode)
45408 {
45409 case QImode:
45410 if (!TARGET_SSE4_1)
45411 return false;
45412 dstmode = V16QImode;
45413 pinsr = gen_sse4_1_pinsrb;
45414 break;
45415
45416 case HImode:
45417 if (!TARGET_SSE2)
45418 return false;
45419 dstmode = V8HImode;
45420 pinsr = gen_sse2_pinsrw;
45421 break;
45422
45423 case SImode:
45424 if (!TARGET_SSE4_1)
45425 return false;
45426 dstmode = V4SImode;
45427 pinsr = gen_sse4_1_pinsrd;
45428 break;
45429
45430 case DImode:
45431 gcc_assert (TARGET_64BIT);
45432 if (!TARGET_SSE4_1)
45433 return false;
45434 dstmode = V2DImode;
45435 pinsr = gen_sse4_1_pinsrq;
45436 break;
45437
45438 default:
45439 return false;
45440 }
45441
45442 rtx d = dst;
45443 if (GET_MODE (dst) != dstmode)
45444 d = gen_reg_rtx (dstmode);
45445 src = gen_lowpart (srcmode, src);
45446
45447 pos /= size;
45448
45449 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45450 GEN_INT (1 << pos)));
45451 if (d != dst)
45452 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45453 return true;
45454 }
45455
45456 default:
45457 return false;
45458 }
45459 }
45460 \f
45461 /* This function returns the calling abi specific va_list type node.
45462 It returns the FNDECL specific va_list type. */
45463
45464 static tree
45465 ix86_fn_abi_va_list (tree fndecl)
45466 {
45467 if (!TARGET_64BIT)
45468 return va_list_type_node;
45469 gcc_assert (fndecl != NULL_TREE);
45470
45471 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45472 return ms_va_list_type_node;
45473 else
45474 return sysv_va_list_type_node;
45475 }
45476
45477 /* Returns the canonical va_list type specified by TYPE. If there
45478 is no valid TYPE provided, it return NULL_TREE. */
45479
45480 static tree
45481 ix86_canonical_va_list_type (tree type)
45482 {
45483 tree wtype, htype;
45484
45485 /* Resolve references and pointers to va_list type. */
45486 if (TREE_CODE (type) == MEM_REF)
45487 type = TREE_TYPE (type);
45488 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45489 type = TREE_TYPE (type);
45490 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45491 type = TREE_TYPE (type);
45492
45493 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45494 {
45495 wtype = va_list_type_node;
45496 gcc_assert (wtype != NULL_TREE);
45497 htype = type;
45498 if (TREE_CODE (wtype) == ARRAY_TYPE)
45499 {
45500 /* If va_list is an array type, the argument may have decayed
45501 to a pointer type, e.g. by being passed to another function.
45502 In that case, unwrap both types so that we can compare the
45503 underlying records. */
45504 if (TREE_CODE (htype) == ARRAY_TYPE
45505 || POINTER_TYPE_P (htype))
45506 {
45507 wtype = TREE_TYPE (wtype);
45508 htype = TREE_TYPE (htype);
45509 }
45510 }
45511 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45512 return va_list_type_node;
45513 wtype = sysv_va_list_type_node;
45514 gcc_assert (wtype != NULL_TREE);
45515 htype = type;
45516 if (TREE_CODE (wtype) == ARRAY_TYPE)
45517 {
45518 /* If va_list is an array type, the argument may have decayed
45519 to a pointer type, e.g. by being passed to another function.
45520 In that case, unwrap both types so that we can compare the
45521 underlying records. */
45522 if (TREE_CODE (htype) == ARRAY_TYPE
45523 || POINTER_TYPE_P (htype))
45524 {
45525 wtype = TREE_TYPE (wtype);
45526 htype = TREE_TYPE (htype);
45527 }
45528 }
45529 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45530 return sysv_va_list_type_node;
45531 wtype = ms_va_list_type_node;
45532 gcc_assert (wtype != NULL_TREE);
45533 htype = type;
45534 if (TREE_CODE (wtype) == ARRAY_TYPE)
45535 {
45536 /* If va_list is an array type, the argument may have decayed
45537 to a pointer type, e.g. by being passed to another function.
45538 In that case, unwrap both types so that we can compare the
45539 underlying records. */
45540 if (TREE_CODE (htype) == ARRAY_TYPE
45541 || POINTER_TYPE_P (htype))
45542 {
45543 wtype = TREE_TYPE (wtype);
45544 htype = TREE_TYPE (htype);
45545 }
45546 }
45547 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45548 return ms_va_list_type_node;
45549 return NULL_TREE;
45550 }
45551 return std_canonical_va_list_type (type);
45552 }
45553
45554 /* Iterate through the target-specific builtin types for va_list.
45555 IDX denotes the iterator, *PTREE is set to the result type of
45556 the va_list builtin, and *PNAME to its internal type.
45557 Returns zero if there is no element for this index, otherwise
45558 IDX should be increased upon the next call.
45559 Note, do not iterate a base builtin's name like __builtin_va_list.
45560 Used from c_common_nodes_and_builtins. */
45561
45562 static int
45563 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45564 {
45565 if (TARGET_64BIT)
45566 {
45567 switch (idx)
45568 {
45569 default:
45570 break;
45571
45572 case 0:
45573 *ptree = ms_va_list_type_node;
45574 *pname = "__builtin_ms_va_list";
45575 return 1;
45576
45577 case 1:
45578 *ptree = sysv_va_list_type_node;
45579 *pname = "__builtin_sysv_va_list";
45580 return 1;
45581 }
45582 }
45583
45584 return 0;
45585 }
45586
45587 #undef TARGET_SCHED_DISPATCH
45588 #define TARGET_SCHED_DISPATCH has_dispatch
45589 #undef TARGET_SCHED_DISPATCH_DO
45590 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45591 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45592 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45593 #undef TARGET_SCHED_REORDER
45594 #define TARGET_SCHED_REORDER ix86_sched_reorder
45595 #undef TARGET_SCHED_ADJUST_PRIORITY
45596 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45597 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45598 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45599 ix86_dependencies_evaluation_hook
45600
45601 /* The size of the dispatch window is the total number of bytes of
45602 object code allowed in a window. */
45603 #define DISPATCH_WINDOW_SIZE 16
45604
45605 /* Number of dispatch windows considered for scheduling. */
45606 #define MAX_DISPATCH_WINDOWS 3
45607
45608 /* Maximum number of instructions in a window. */
45609 #define MAX_INSN 4
45610
45611 /* Maximum number of immediate operands in a window. */
45612 #define MAX_IMM 4
45613
45614 /* Maximum number of immediate bits allowed in a window. */
45615 #define MAX_IMM_SIZE 128
45616
45617 /* Maximum number of 32 bit immediates allowed in a window. */
45618 #define MAX_IMM_32 4
45619
45620 /* Maximum number of 64 bit immediates allowed in a window. */
45621 #define MAX_IMM_64 2
45622
45623 /* Maximum total of loads or prefetches allowed in a window. */
45624 #define MAX_LOAD 2
45625
45626 /* Maximum total of stores allowed in a window. */
45627 #define MAX_STORE 1
45628
45629 #undef BIG
45630 #define BIG 100
45631
45632
45633 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45634 enum dispatch_group {
45635 disp_no_group = 0,
45636 disp_load,
45637 disp_store,
45638 disp_load_store,
45639 disp_prefetch,
45640 disp_imm,
45641 disp_imm_32,
45642 disp_imm_64,
45643 disp_branch,
45644 disp_cmp,
45645 disp_jcc,
45646 disp_last
45647 };
45648
45649 /* Number of allowable groups in a dispatch window. It is an array
45650 indexed by dispatch_group enum. 100 is used as a big number,
45651 because the number of these kind of operations does not have any
45652 effect in dispatch window, but we need them for other reasons in
45653 the table. */
45654 static unsigned int num_allowable_groups[disp_last] = {
45655 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45656 };
45657
45658 char group_name[disp_last + 1][16] = {
45659 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45660 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45661 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45662 };
45663
45664 /* Instruction path. */
45665 enum insn_path {
45666 no_path = 0,
45667 path_single, /* Single micro op. */
45668 path_double, /* Double micro op. */
45669 path_multi, /* Instructions with more than 2 micro op.. */
45670 last_path
45671 };
45672
45673 /* sched_insn_info defines a window to the instructions scheduled in
45674 the basic block. It contains a pointer to the insn_info table and
45675 the instruction scheduled.
45676
45677 Windows are allocated for each basic block and are linked
45678 together. */
45679 typedef struct sched_insn_info_s {
45680 rtx insn;
45681 enum dispatch_group group;
45682 enum insn_path path;
45683 int byte_len;
45684 int imm_bytes;
45685 } sched_insn_info;
45686
45687 /* Linked list of dispatch windows. This is a two way list of
45688 dispatch windows of a basic block. It contains information about
45689 the number of uops in the window and the total number of
45690 instructions and of bytes in the object code for this dispatch
45691 window. */
45692 typedef struct dispatch_windows_s {
45693 int num_insn; /* Number of insn in the window. */
45694 int num_uops; /* Number of uops in the window. */
45695 int window_size; /* Number of bytes in the window. */
45696 int window_num; /* Window number between 0 or 1. */
45697 int num_imm; /* Number of immediates in an insn. */
45698 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45699 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45700 int imm_size; /* Total immediates in the window. */
45701 int num_loads; /* Total memory loads in the window. */
45702 int num_stores; /* Total memory stores in the window. */
45703 int violation; /* Violation exists in window. */
45704 sched_insn_info *window; /* Pointer to the window. */
45705 struct dispatch_windows_s *next;
45706 struct dispatch_windows_s *prev;
45707 } dispatch_windows;
45708
45709 /* Immediate valuse used in an insn. */
45710 typedef struct imm_info_s
45711 {
45712 int imm;
45713 int imm32;
45714 int imm64;
45715 } imm_info;
45716
45717 static dispatch_windows *dispatch_window_list;
45718 static dispatch_windows *dispatch_window_list1;
45719
45720 /* Get dispatch group of insn. */
45721
45722 static enum dispatch_group
45723 get_mem_group (rtx insn)
45724 {
45725 enum attr_memory memory;
45726
45727 if (INSN_CODE (insn) < 0)
45728 return disp_no_group;
45729 memory = get_attr_memory (insn);
45730 if (memory == MEMORY_STORE)
45731 return disp_store;
45732
45733 if (memory == MEMORY_LOAD)
45734 return disp_load;
45735
45736 if (memory == MEMORY_BOTH)
45737 return disp_load_store;
45738
45739 return disp_no_group;
45740 }
45741
45742 /* Return true if insn is a compare instruction. */
45743
45744 static bool
45745 is_cmp (rtx insn)
45746 {
45747 enum attr_type type;
45748
45749 type = get_attr_type (insn);
45750 return (type == TYPE_TEST
45751 || type == TYPE_ICMP
45752 || type == TYPE_FCMP
45753 || GET_CODE (PATTERN (insn)) == COMPARE);
45754 }
45755
45756 /* Return true if a dispatch violation encountered. */
45757
45758 static bool
45759 dispatch_violation (void)
45760 {
45761 if (dispatch_window_list->next)
45762 return dispatch_window_list->next->violation;
45763 return dispatch_window_list->violation;
45764 }
45765
45766 /* Return true if insn is a branch instruction. */
45767
45768 static bool
45769 is_branch (rtx insn)
45770 {
45771 return (CALL_P (insn) || JUMP_P (insn));
45772 }
45773
45774 /* Return true if insn is a prefetch instruction. */
45775
45776 static bool
45777 is_prefetch (rtx insn)
45778 {
45779 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45780 }
45781
45782 /* This function initializes a dispatch window and the list container holding a
45783 pointer to the window. */
45784
45785 static void
45786 init_window (int window_num)
45787 {
45788 int i;
45789 dispatch_windows *new_list;
45790
45791 if (window_num == 0)
45792 new_list = dispatch_window_list;
45793 else
45794 new_list = dispatch_window_list1;
45795
45796 new_list->num_insn = 0;
45797 new_list->num_uops = 0;
45798 new_list->window_size = 0;
45799 new_list->next = NULL;
45800 new_list->prev = NULL;
45801 new_list->window_num = window_num;
45802 new_list->num_imm = 0;
45803 new_list->num_imm_32 = 0;
45804 new_list->num_imm_64 = 0;
45805 new_list->imm_size = 0;
45806 new_list->num_loads = 0;
45807 new_list->num_stores = 0;
45808 new_list->violation = false;
45809
45810 for (i = 0; i < MAX_INSN; i++)
45811 {
45812 new_list->window[i].insn = NULL;
45813 new_list->window[i].group = disp_no_group;
45814 new_list->window[i].path = no_path;
45815 new_list->window[i].byte_len = 0;
45816 new_list->window[i].imm_bytes = 0;
45817 }
45818 return;
45819 }
45820
45821 /* This function allocates and initializes a dispatch window and the
45822 list container holding a pointer to the window. */
45823
45824 static dispatch_windows *
45825 allocate_window (void)
45826 {
45827 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45828 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45829
45830 return new_list;
45831 }
45832
45833 /* This routine initializes the dispatch scheduling information. It
45834 initiates building dispatch scheduler tables and constructs the
45835 first dispatch window. */
45836
45837 static void
45838 init_dispatch_sched (void)
45839 {
45840 /* Allocate a dispatch list and a window. */
45841 dispatch_window_list = allocate_window ();
45842 dispatch_window_list1 = allocate_window ();
45843 init_window (0);
45844 init_window (1);
45845 }
45846
45847 /* This function returns true if a branch is detected. End of a basic block
45848 does not have to be a branch, but here we assume only branches end a
45849 window. */
45850
45851 static bool
45852 is_end_basic_block (enum dispatch_group group)
45853 {
45854 return group == disp_branch;
45855 }
45856
45857 /* This function is called when the end of a window processing is reached. */
45858
45859 static void
45860 process_end_window (void)
45861 {
45862 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45863 if (dispatch_window_list->next)
45864 {
45865 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45866 gcc_assert (dispatch_window_list->window_size
45867 + dispatch_window_list1->window_size <= 48);
45868 init_window (1);
45869 }
45870 init_window (0);
45871 }
45872
45873 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45874 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45875 for 48 bytes of instructions. Note that these windows are not dispatch
45876 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45877
45878 static dispatch_windows *
45879 allocate_next_window (int window_num)
45880 {
45881 if (window_num == 0)
45882 {
45883 if (dispatch_window_list->next)
45884 init_window (1);
45885 init_window (0);
45886 return dispatch_window_list;
45887 }
45888
45889 dispatch_window_list->next = dispatch_window_list1;
45890 dispatch_window_list1->prev = dispatch_window_list;
45891
45892 return dispatch_window_list1;
45893 }
45894
45895 /* Increment the number of immediate operands of an instruction. */
45896
45897 static int
45898 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45899 {
45900 if (*in_rtx == 0)
45901 return 0;
45902
45903 switch ( GET_CODE (*in_rtx))
45904 {
45905 case CONST:
45906 case SYMBOL_REF:
45907 case CONST_INT:
45908 (imm_values->imm)++;
45909 if (x86_64_immediate_operand (*in_rtx, SImode))
45910 (imm_values->imm32)++;
45911 else
45912 (imm_values->imm64)++;
45913 break;
45914
45915 case CONST_DOUBLE:
45916 (imm_values->imm)++;
45917 (imm_values->imm64)++;
45918 break;
45919
45920 case CODE_LABEL:
45921 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45922 {
45923 (imm_values->imm)++;
45924 (imm_values->imm32)++;
45925 }
45926 break;
45927
45928 default:
45929 break;
45930 }
45931
45932 return 0;
45933 }
45934
45935 /* Compute number of immediate operands of an instruction. */
45936
45937 static void
45938 find_constant (rtx in_rtx, imm_info *imm_values)
45939 {
45940 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45941 (rtx_function) find_constant_1, (void *) imm_values);
45942 }
45943
45944 /* Return total size of immediate operands of an instruction along with number
45945 of corresponding immediate-operands. It initializes its parameters to zero
45946 befor calling FIND_CONSTANT.
45947 INSN is the input instruction. IMM is the total of immediates.
45948 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45949 bit immediates. */
45950
45951 static int
45952 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45953 {
45954 imm_info imm_values = {0, 0, 0};
45955
45956 find_constant (insn, &imm_values);
45957 *imm = imm_values.imm;
45958 *imm32 = imm_values.imm32;
45959 *imm64 = imm_values.imm64;
45960 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45961 }
45962
45963 /* This function indicates if an operand of an instruction is an
45964 immediate. */
45965
45966 static bool
45967 has_immediate (rtx insn)
45968 {
45969 int num_imm_operand;
45970 int num_imm32_operand;
45971 int num_imm64_operand;
45972
45973 if (insn)
45974 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45975 &num_imm64_operand);
45976 return false;
45977 }
45978
45979 /* Return single or double path for instructions. */
45980
45981 static enum insn_path
45982 get_insn_path (rtx insn)
45983 {
45984 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45985
45986 if ((int)path == 0)
45987 return path_single;
45988
45989 if ((int)path == 1)
45990 return path_double;
45991
45992 return path_multi;
45993 }
45994
45995 /* Return insn dispatch group. */
45996
45997 static enum dispatch_group
45998 get_insn_group (rtx insn)
45999 {
46000 enum dispatch_group group = get_mem_group (insn);
46001 if (group)
46002 return group;
46003
46004 if (is_branch (insn))
46005 return disp_branch;
46006
46007 if (is_cmp (insn))
46008 return disp_cmp;
46009
46010 if (has_immediate (insn))
46011 return disp_imm;
46012
46013 if (is_prefetch (insn))
46014 return disp_prefetch;
46015
46016 return disp_no_group;
46017 }
46018
46019 /* Count number of GROUP restricted instructions in a dispatch
46020 window WINDOW_LIST. */
46021
46022 static int
46023 count_num_restricted (rtx insn, dispatch_windows *window_list)
46024 {
46025 enum dispatch_group group = get_insn_group (insn);
46026 int imm_size;
46027 int num_imm_operand;
46028 int num_imm32_operand;
46029 int num_imm64_operand;
46030
46031 if (group == disp_no_group)
46032 return 0;
46033
46034 if (group == disp_imm)
46035 {
46036 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46037 &num_imm64_operand);
46038 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46039 || num_imm_operand + window_list->num_imm > MAX_IMM
46040 || (num_imm32_operand > 0
46041 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46042 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46043 || (num_imm64_operand > 0
46044 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46045 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46046 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46047 && num_imm64_operand > 0
46048 && ((window_list->num_imm_64 > 0
46049 && window_list->num_insn >= 2)
46050 || window_list->num_insn >= 3)))
46051 return BIG;
46052
46053 return 1;
46054 }
46055
46056 if ((group == disp_load_store
46057 && (window_list->num_loads >= MAX_LOAD
46058 || window_list->num_stores >= MAX_STORE))
46059 || ((group == disp_load
46060 || group == disp_prefetch)
46061 && window_list->num_loads >= MAX_LOAD)
46062 || (group == disp_store
46063 && window_list->num_stores >= MAX_STORE))
46064 return BIG;
46065
46066 return 1;
46067 }
46068
46069 /* This function returns true if insn satisfies dispatch rules on the
46070 last window scheduled. */
46071
46072 static bool
46073 fits_dispatch_window (rtx insn)
46074 {
46075 dispatch_windows *window_list = dispatch_window_list;
46076 dispatch_windows *window_list_next = dispatch_window_list->next;
46077 unsigned int num_restrict;
46078 enum dispatch_group group = get_insn_group (insn);
46079 enum insn_path path = get_insn_path (insn);
46080 int sum;
46081
46082 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46083 instructions should be given the lowest priority in the
46084 scheduling process in Haifa scheduler to make sure they will be
46085 scheduled in the same dispatch window as the reference to them. */
46086 if (group == disp_jcc || group == disp_cmp)
46087 return false;
46088
46089 /* Check nonrestricted. */
46090 if (group == disp_no_group || group == disp_branch)
46091 return true;
46092
46093 /* Get last dispatch window. */
46094 if (window_list_next)
46095 window_list = window_list_next;
46096
46097 if (window_list->window_num == 1)
46098 {
46099 sum = window_list->prev->window_size + window_list->window_size;
46100
46101 if (sum == 32
46102 || (min_insn_size (insn) + sum) >= 48)
46103 /* Window 1 is full. Go for next window. */
46104 return true;
46105 }
46106
46107 num_restrict = count_num_restricted (insn, window_list);
46108
46109 if (num_restrict > num_allowable_groups[group])
46110 return false;
46111
46112 /* See if it fits in the first window. */
46113 if (window_list->window_num == 0)
46114 {
46115 /* The first widow should have only single and double path
46116 uops. */
46117 if (path == path_double
46118 && (window_list->num_uops + 2) > MAX_INSN)
46119 return false;
46120 else if (path != path_single)
46121 return false;
46122 }
46123 return true;
46124 }
46125
46126 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46127 dispatch window WINDOW_LIST. */
46128
46129 static void
46130 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46131 {
46132 int byte_len = min_insn_size (insn);
46133 int num_insn = window_list->num_insn;
46134 int imm_size;
46135 sched_insn_info *window = window_list->window;
46136 enum dispatch_group group = get_insn_group (insn);
46137 enum insn_path path = get_insn_path (insn);
46138 int num_imm_operand;
46139 int num_imm32_operand;
46140 int num_imm64_operand;
46141
46142 if (!window_list->violation && group != disp_cmp
46143 && !fits_dispatch_window (insn))
46144 window_list->violation = true;
46145
46146 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46147 &num_imm64_operand);
46148
46149 /* Initialize window with new instruction. */
46150 window[num_insn].insn = insn;
46151 window[num_insn].byte_len = byte_len;
46152 window[num_insn].group = group;
46153 window[num_insn].path = path;
46154 window[num_insn].imm_bytes = imm_size;
46155
46156 window_list->window_size += byte_len;
46157 window_list->num_insn = num_insn + 1;
46158 window_list->num_uops = window_list->num_uops + num_uops;
46159 window_list->imm_size += imm_size;
46160 window_list->num_imm += num_imm_operand;
46161 window_list->num_imm_32 += num_imm32_operand;
46162 window_list->num_imm_64 += num_imm64_operand;
46163
46164 if (group == disp_store)
46165 window_list->num_stores += 1;
46166 else if (group == disp_load
46167 || group == disp_prefetch)
46168 window_list->num_loads += 1;
46169 else if (group == disp_load_store)
46170 {
46171 window_list->num_stores += 1;
46172 window_list->num_loads += 1;
46173 }
46174 }
46175
46176 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46177 If the total bytes of instructions or the number of instructions in
46178 the window exceed allowable, it allocates a new window. */
46179
46180 static void
46181 add_to_dispatch_window (rtx insn)
46182 {
46183 int byte_len;
46184 dispatch_windows *window_list;
46185 dispatch_windows *next_list;
46186 dispatch_windows *window0_list;
46187 enum insn_path path;
46188 enum dispatch_group insn_group;
46189 bool insn_fits;
46190 int num_insn;
46191 int num_uops;
46192 int window_num;
46193 int insn_num_uops;
46194 int sum;
46195
46196 if (INSN_CODE (insn) < 0)
46197 return;
46198
46199 byte_len = min_insn_size (insn);
46200 window_list = dispatch_window_list;
46201 next_list = window_list->next;
46202 path = get_insn_path (insn);
46203 insn_group = get_insn_group (insn);
46204
46205 /* Get the last dispatch window. */
46206 if (next_list)
46207 window_list = dispatch_window_list->next;
46208
46209 if (path == path_single)
46210 insn_num_uops = 1;
46211 else if (path == path_double)
46212 insn_num_uops = 2;
46213 else
46214 insn_num_uops = (int) path;
46215
46216 /* If current window is full, get a new window.
46217 Window number zero is full, if MAX_INSN uops are scheduled in it.
46218 Window number one is full, if window zero's bytes plus window
46219 one's bytes is 32, or if the bytes of the new instruction added
46220 to the total makes it greater than 48, or it has already MAX_INSN
46221 instructions in it. */
46222 num_insn = window_list->num_insn;
46223 num_uops = window_list->num_uops;
46224 window_num = window_list->window_num;
46225 insn_fits = fits_dispatch_window (insn);
46226
46227 if (num_insn >= MAX_INSN
46228 || num_uops + insn_num_uops > MAX_INSN
46229 || !(insn_fits))
46230 {
46231 window_num = ~window_num & 1;
46232 window_list = allocate_next_window (window_num);
46233 }
46234
46235 if (window_num == 0)
46236 {
46237 add_insn_window (insn, window_list, insn_num_uops);
46238 if (window_list->num_insn >= MAX_INSN
46239 && insn_group == disp_branch)
46240 {
46241 process_end_window ();
46242 return;
46243 }
46244 }
46245 else if (window_num == 1)
46246 {
46247 window0_list = window_list->prev;
46248 sum = window0_list->window_size + window_list->window_size;
46249 if (sum == 32
46250 || (byte_len + sum) >= 48)
46251 {
46252 process_end_window ();
46253 window_list = dispatch_window_list;
46254 }
46255
46256 add_insn_window (insn, window_list, insn_num_uops);
46257 }
46258 else
46259 gcc_unreachable ();
46260
46261 if (is_end_basic_block (insn_group))
46262 {
46263 /* End of basic block is reached do end-basic-block process. */
46264 process_end_window ();
46265 return;
46266 }
46267 }
46268
46269 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46270
46271 DEBUG_FUNCTION static void
46272 debug_dispatch_window_file (FILE *file, int window_num)
46273 {
46274 dispatch_windows *list;
46275 int i;
46276
46277 if (window_num == 0)
46278 list = dispatch_window_list;
46279 else
46280 list = dispatch_window_list1;
46281
46282 fprintf (file, "Window #%d:\n", list->window_num);
46283 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46284 list->num_insn, list->num_uops, list->window_size);
46285 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46286 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46287
46288 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46289 list->num_stores);
46290 fprintf (file, " insn info:\n");
46291
46292 for (i = 0; i < MAX_INSN; i++)
46293 {
46294 if (!list->window[i].insn)
46295 break;
46296 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46297 i, group_name[list->window[i].group],
46298 i, (void *)list->window[i].insn,
46299 i, list->window[i].path,
46300 i, list->window[i].byte_len,
46301 i, list->window[i].imm_bytes);
46302 }
46303 }
46304
46305 /* Print to stdout a dispatch window. */
46306
46307 DEBUG_FUNCTION void
46308 debug_dispatch_window (int window_num)
46309 {
46310 debug_dispatch_window_file (stdout, window_num);
46311 }
46312
46313 /* Print INSN dispatch information to FILE. */
46314
46315 DEBUG_FUNCTION static void
46316 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46317 {
46318 int byte_len;
46319 enum insn_path path;
46320 enum dispatch_group group;
46321 int imm_size;
46322 int num_imm_operand;
46323 int num_imm32_operand;
46324 int num_imm64_operand;
46325
46326 if (INSN_CODE (insn) < 0)
46327 return;
46328
46329 byte_len = min_insn_size (insn);
46330 path = get_insn_path (insn);
46331 group = get_insn_group (insn);
46332 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46333 &num_imm64_operand);
46334
46335 fprintf (file, " insn info:\n");
46336 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46337 group_name[group], path, byte_len);
46338 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46339 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46340 }
46341
46342 /* Print to STDERR the status of the ready list with respect to
46343 dispatch windows. */
46344
46345 DEBUG_FUNCTION void
46346 debug_ready_dispatch (void)
46347 {
46348 int i;
46349 int no_ready = number_in_ready ();
46350
46351 fprintf (stdout, "Number of ready: %d\n", no_ready);
46352
46353 for (i = 0; i < no_ready; i++)
46354 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46355 }
46356
46357 /* This routine is the driver of the dispatch scheduler. */
46358
46359 static void
46360 do_dispatch (rtx insn, int mode)
46361 {
46362 if (mode == DISPATCH_INIT)
46363 init_dispatch_sched ();
46364 else if (mode == ADD_TO_DISPATCH_WINDOW)
46365 add_to_dispatch_window (insn);
46366 }
46367
46368 /* Return TRUE if Dispatch Scheduling is supported. */
46369
46370 static bool
46371 has_dispatch (rtx insn, int action)
46372 {
46373 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46374 && flag_dispatch_scheduler)
46375 switch (action)
46376 {
46377 default:
46378 return false;
46379
46380 case IS_DISPATCH_ON:
46381 return true;
46382 break;
46383
46384 case IS_CMP:
46385 return is_cmp (insn);
46386
46387 case DISPATCH_VIOLATION:
46388 return dispatch_violation ();
46389
46390 case FITS_DISPATCH_WINDOW:
46391 return fits_dispatch_window (insn);
46392 }
46393
46394 return false;
46395 }
46396
46397 /* Implementation of reassociation_width target hook used by
46398 reassoc phase to identify parallelism level in reassociated
46399 tree. Statements tree_code is passed in OPC. Arguments type
46400 is passed in MODE.
46401
46402 Currently parallel reassociation is enabled for Atom
46403 processors only and we set reassociation width to be 2
46404 because Atom may issue up to 2 instructions per cycle.
46405
46406 Return value should be fixed if parallel reassociation is
46407 enabled for other processors. */
46408
46409 static int
46410 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46411 {
46412 int res = 1;
46413
46414 /* Vector part. */
46415 if (VECTOR_MODE_P (mode))
46416 {
46417 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46418 return 2;
46419 else
46420 return 1;
46421 }
46422
46423 /* Scalar part. */
46424 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46425 res = 2;
46426 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46427 res = 2;
46428
46429 return res;
46430 }
46431
46432 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46433 place emms and femms instructions. */
46434
46435 static enum machine_mode
46436 ix86_preferred_simd_mode (enum machine_mode mode)
46437 {
46438 if (!TARGET_SSE)
46439 return word_mode;
46440
46441 switch (mode)
46442 {
46443 case QImode:
46444 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46445 case HImode:
46446 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46447 case SImode:
46448 return TARGET_AVX512F ? V16SImode :
46449 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46450 case DImode:
46451 return TARGET_AVX512F ? V8DImode :
46452 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46453
46454 case SFmode:
46455 if (TARGET_AVX512F)
46456 return V16SFmode;
46457 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46458 return V8SFmode;
46459 else
46460 return V4SFmode;
46461
46462 case DFmode:
46463 if (!TARGET_VECTORIZE_DOUBLE)
46464 return word_mode;
46465 else if (TARGET_AVX512F)
46466 return V8DFmode;
46467 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46468 return V4DFmode;
46469 else if (TARGET_SSE2)
46470 return V2DFmode;
46471 /* FALLTHRU */
46472
46473 default:
46474 return word_mode;
46475 }
46476 }
46477
46478 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46479 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46480 256bit and 128bit vectors. */
46481
46482 static unsigned int
46483 ix86_autovectorize_vector_sizes (void)
46484 {
46485 return TARGET_AVX512F ? 64 | 32 | 16 :
46486 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46487 }
46488
46489 \f
46490
46491 /* Return class of registers which could be used for pseudo of MODE
46492 and of class RCLASS for spilling instead of memory. Return NO_REGS
46493 if it is not possible or non-profitable. */
46494 static reg_class_t
46495 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46496 {
46497 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46498 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46499 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46500 return ALL_SSE_REGS;
46501 return NO_REGS;
46502 }
46503
46504 /* Implement targetm.vectorize.init_cost. */
46505
46506 static void *
46507 ix86_init_cost (struct loop *)
46508 {
46509 unsigned *cost = XNEWVEC (unsigned, 3);
46510 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46511 return cost;
46512 }
46513
46514 /* Implement targetm.vectorize.add_stmt_cost. */
46515
46516 static unsigned
46517 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46518 struct _stmt_vec_info *stmt_info, int misalign,
46519 enum vect_cost_model_location where)
46520 {
46521 unsigned *cost = (unsigned *) data;
46522 unsigned retval = 0;
46523
46524 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46525 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46526
46527 /* Statements in an inner loop relative to the loop being
46528 vectorized are weighted more heavily. The value here is
46529 arbitrary and could potentially be improved with analysis. */
46530 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46531 count *= 50; /* FIXME. */
46532
46533 retval = (unsigned) (count * stmt_cost);
46534
46535 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46536 for Silvermont as it has out of order integer pipeline and can execute
46537 2 scalar instruction per tick, but has in order SIMD pipeline. */
46538 if (TARGET_SILVERMONT || TARGET_INTEL)
46539 if (stmt_info && stmt_info->stmt)
46540 {
46541 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46542 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46543 retval = (retval * 17) / 10;
46544 }
46545
46546 cost[where] += retval;
46547
46548 return retval;
46549 }
46550
46551 /* Implement targetm.vectorize.finish_cost. */
46552
46553 static void
46554 ix86_finish_cost (void *data, unsigned *prologue_cost,
46555 unsigned *body_cost, unsigned *epilogue_cost)
46556 {
46557 unsigned *cost = (unsigned *) data;
46558 *prologue_cost = cost[vect_prologue];
46559 *body_cost = cost[vect_body];
46560 *epilogue_cost = cost[vect_epilogue];
46561 }
46562
46563 /* Implement targetm.vectorize.destroy_cost_data. */
46564
46565 static void
46566 ix86_destroy_cost_data (void *data)
46567 {
46568 free (data);
46569 }
46570
46571 /* Validate target specific memory model bits in VAL. */
46572
46573 static unsigned HOST_WIDE_INT
46574 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46575 {
46576 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46577 bool strong;
46578
46579 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46580 |MEMMODEL_MASK)
46581 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46582 {
46583 warning (OPT_Winvalid_memory_model,
46584 "Unknown architecture specific memory model");
46585 return MEMMODEL_SEQ_CST;
46586 }
46587 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46588 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46589 {
46590 warning (OPT_Winvalid_memory_model,
46591 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46592 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46593 }
46594 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46595 {
46596 warning (OPT_Winvalid_memory_model,
46597 "HLE_RELEASE not used with RELEASE or stronger memory model");
46598 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46599 }
46600 return val;
46601 }
46602
46603 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46604 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46605 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46606 or number of vecsize_mangle variants that should be emitted. */
46607
46608 static int
46609 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46610 struct cgraph_simd_clone *clonei,
46611 tree base_type, int num)
46612 {
46613 int ret = 1;
46614
46615 if (clonei->simdlen
46616 && (clonei->simdlen < 2
46617 || clonei->simdlen > 16
46618 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46619 {
46620 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46621 "unsupported simdlen %d", clonei->simdlen);
46622 return 0;
46623 }
46624
46625 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46626 if (TREE_CODE (ret_type) != VOID_TYPE)
46627 switch (TYPE_MODE (ret_type))
46628 {
46629 case QImode:
46630 case HImode:
46631 case SImode:
46632 case DImode:
46633 case SFmode:
46634 case DFmode:
46635 /* case SCmode: */
46636 /* case DCmode: */
46637 break;
46638 default:
46639 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46640 "unsupported return type %qT for simd\n", ret_type);
46641 return 0;
46642 }
46643
46644 tree t;
46645 int i;
46646
46647 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46648 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46649 switch (TYPE_MODE (TREE_TYPE (t)))
46650 {
46651 case QImode:
46652 case HImode:
46653 case SImode:
46654 case DImode:
46655 case SFmode:
46656 case DFmode:
46657 /* case SCmode: */
46658 /* case DCmode: */
46659 break;
46660 default:
46661 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46662 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46663 return 0;
46664 }
46665
46666 if (clonei->cilk_elemental)
46667 {
46668 /* Parse here processor clause. If not present, default to 'b'. */
46669 clonei->vecsize_mangle = 'b';
46670 }
46671 else if (!TREE_PUBLIC (node->decl))
46672 {
46673 /* If the function isn't exported, we can pick up just one ISA
46674 for the clones. */
46675 if (TARGET_AVX2)
46676 clonei->vecsize_mangle = 'd';
46677 else if (TARGET_AVX)
46678 clonei->vecsize_mangle = 'c';
46679 else
46680 clonei->vecsize_mangle = 'b';
46681 ret = 1;
46682 }
46683 else
46684 {
46685 clonei->vecsize_mangle = "bcd"[num];
46686 ret = 3;
46687 }
46688 switch (clonei->vecsize_mangle)
46689 {
46690 case 'b':
46691 clonei->vecsize_int = 128;
46692 clonei->vecsize_float = 128;
46693 break;
46694 case 'c':
46695 clonei->vecsize_int = 128;
46696 clonei->vecsize_float = 256;
46697 break;
46698 case 'd':
46699 clonei->vecsize_int = 256;
46700 clonei->vecsize_float = 256;
46701 break;
46702 }
46703 if (clonei->simdlen == 0)
46704 {
46705 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46706 clonei->simdlen = clonei->vecsize_int;
46707 else
46708 clonei->simdlen = clonei->vecsize_float;
46709 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46710 if (clonei->simdlen > 16)
46711 clonei->simdlen = 16;
46712 }
46713 return ret;
46714 }
46715
46716 /* Add target attribute to SIMD clone NODE if needed. */
46717
46718 static void
46719 ix86_simd_clone_adjust (struct cgraph_node *node)
46720 {
46721 const char *str = NULL;
46722 gcc_assert (node->decl == cfun->decl);
46723 switch (node->simdclone->vecsize_mangle)
46724 {
46725 case 'b':
46726 if (!TARGET_SSE2)
46727 str = "sse2";
46728 break;
46729 case 'c':
46730 if (!TARGET_AVX)
46731 str = "avx";
46732 break;
46733 case 'd':
46734 if (!TARGET_AVX2)
46735 str = "avx2";
46736 break;
46737 default:
46738 gcc_unreachable ();
46739 }
46740 if (str == NULL)
46741 return;
46742 push_cfun (NULL);
46743 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46744 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46745 gcc_assert (ok);
46746 pop_cfun ();
46747 ix86_previous_fndecl = NULL_TREE;
46748 ix86_set_current_function (node->decl);
46749 }
46750
46751 /* If SIMD clone NODE can't be used in a vectorized loop
46752 in current function, return -1, otherwise return a badness of using it
46753 (0 if it is most desirable from vecsize_mangle point of view, 1
46754 slightly less desirable, etc.). */
46755
46756 static int
46757 ix86_simd_clone_usable (struct cgraph_node *node)
46758 {
46759 switch (node->simdclone->vecsize_mangle)
46760 {
46761 case 'b':
46762 if (!TARGET_SSE2)
46763 return -1;
46764 if (!TARGET_AVX)
46765 return 0;
46766 return TARGET_AVX2 ? 2 : 1;
46767 case 'c':
46768 if (!TARGET_AVX)
46769 return -1;
46770 return TARGET_AVX2 ? 1 : 0;
46771 break;
46772 case 'd':
46773 if (!TARGET_AVX2)
46774 return -1;
46775 return 0;
46776 default:
46777 gcc_unreachable ();
46778 }
46779 }
46780
46781 /* This function gives out the number of memory references.
46782 This value determines the unrolling factor for
46783 bdver3 and bdver4 architectures. */
46784
46785 static int
46786 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46787 {
46788 if (*x != NULL_RTX && MEM_P (*x))
46789 {
46790 enum machine_mode mode;
46791 unsigned int n_words;
46792
46793 mode = GET_MODE (*x);
46794 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46795
46796 if (n_words > 4)
46797 (*mem_count)+=2;
46798 else
46799 (*mem_count)+=1;
46800 }
46801 return 0;
46802 }
46803
46804 /* This function adjusts the unroll factor based on
46805 the hardware capabilities. For ex, bdver3 has
46806 a loop buffer which makes unrolling of smaller
46807 loops less important. This function decides the
46808 unroll factor using number of memory references
46809 (value 32 is used) as a heuristic. */
46810
46811 static unsigned
46812 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46813 {
46814 basic_block *bbs;
46815 rtx insn;
46816 unsigned i;
46817 unsigned mem_count = 0;
46818
46819 if (!TARGET_ADJUST_UNROLL)
46820 return nunroll;
46821
46822 /* Count the number of memory references within the loop body. */
46823 bbs = get_loop_body (loop);
46824 for (i = 0; i < loop->num_nodes; i++)
46825 {
46826 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46827 if (NONDEBUG_INSN_P (insn))
46828 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46829 }
46830 free (bbs);
46831
46832 if (mem_count && mem_count <=32)
46833 return 32/mem_count;
46834
46835 return nunroll;
46836 }
46837
46838
46839 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46840
46841 static bool
46842 ix86_float_exceptions_rounding_supported_p (void)
46843 {
46844 /* For x87 floating point with standard excess precision handling,
46845 there is no adddf3 pattern (since x87 floating point only has
46846 XFmode operations) so the default hook implementation gets this
46847 wrong. */
46848 return TARGET_80387 || TARGET_SSE_MATH;
46849 }
46850
46851 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46852
46853 static void
46854 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46855 {
46856 if (!TARGET_80387 && !TARGET_SSE_MATH)
46857 return;
46858 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46859 if (TARGET_80387)
46860 {
46861 tree fenv_index_type = build_index_type (size_int (6));
46862 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46863 tree fenv_var = create_tmp_var (fenv_type, NULL);
46864 mark_addressable (fenv_var);
46865 tree fenv_ptr = build_pointer_type (fenv_type);
46866 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46867 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46868 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46869 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46870 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46871 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46872 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46873 tree hold_fnclex = build_call_expr (fnclex, 0);
46874 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46875 hold_fnclex);
46876 *clear = build_call_expr (fnclex, 0);
46877 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46878 tree fnstsw_call = build_call_expr (fnstsw, 0);
46879 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
46880 sw_var, fnstsw_call);
46881 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46882 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46883 exceptions_var, exceptions_x87);
46884 *update = build2 (COMPOUND_EXPR, integer_type_node,
46885 sw_mod, update_mod);
46886 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46887 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46888 }
46889 if (TARGET_SSE_MATH)
46890 {
46891 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46892 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46893 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46894 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46895 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46896 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46897 mxcsr_orig_var, stmxcsr_hold_call);
46898 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46899 mxcsr_orig_var,
46900 build_int_cst (unsigned_type_node, 0x1f80));
46901 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46902 build_int_cst (unsigned_type_node, 0xffffffc0));
46903 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46904 mxcsr_mod_var, hold_mod_val);
46905 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46906 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46907 hold_assign_orig, hold_assign_mod);
46908 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46909 ldmxcsr_hold_call);
46910 if (*hold)
46911 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46912 else
46913 *hold = hold_all;
46914 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46915 if (*clear)
46916 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46917 ldmxcsr_clear_call);
46918 else
46919 *clear = ldmxcsr_clear_call;
46920 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46921 tree exceptions_sse = fold_convert (integer_type_node,
46922 stxmcsr_update_call);
46923 if (*update)
46924 {
46925 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46926 exceptions_var, exceptions_sse);
46927 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46928 exceptions_var, exceptions_mod);
46929 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46930 exceptions_assign);
46931 }
46932 else
46933 *update = build2 (MODIFY_EXPR, integer_type_node,
46934 exceptions_var, exceptions_sse);
46935 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46936 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46937 ldmxcsr_update_call);
46938 }
46939 tree atomic_feraiseexcept
46940 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46941 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46942 1, exceptions_var);
46943 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46944 atomic_feraiseexcept_call);
46945 }
46946
46947 /* Initialize the GCC target structure. */
46948 #undef TARGET_RETURN_IN_MEMORY
46949 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46950
46951 #undef TARGET_LEGITIMIZE_ADDRESS
46952 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46953
46954 #undef TARGET_ATTRIBUTE_TABLE
46955 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46956 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46957 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46958 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46959 # undef TARGET_MERGE_DECL_ATTRIBUTES
46960 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46961 #endif
46962
46963 #undef TARGET_COMP_TYPE_ATTRIBUTES
46964 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46965
46966 #undef TARGET_INIT_BUILTINS
46967 #define TARGET_INIT_BUILTINS ix86_init_builtins
46968 #undef TARGET_BUILTIN_DECL
46969 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46970 #undef TARGET_EXPAND_BUILTIN
46971 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46972
46973 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46974 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46975 ix86_builtin_vectorized_function
46976
46977 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46978 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46979
46980 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46981 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46982
46983 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46984 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46985
46986 #undef TARGET_BUILTIN_RECIPROCAL
46987 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46988
46989 #undef TARGET_ASM_FUNCTION_EPILOGUE
46990 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46991
46992 #undef TARGET_ENCODE_SECTION_INFO
46993 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46994 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46995 #else
46996 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46997 #endif
46998
46999 #undef TARGET_ASM_OPEN_PAREN
47000 #define TARGET_ASM_OPEN_PAREN ""
47001 #undef TARGET_ASM_CLOSE_PAREN
47002 #define TARGET_ASM_CLOSE_PAREN ""
47003
47004 #undef TARGET_ASM_BYTE_OP
47005 #define TARGET_ASM_BYTE_OP ASM_BYTE
47006
47007 #undef TARGET_ASM_ALIGNED_HI_OP
47008 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47009 #undef TARGET_ASM_ALIGNED_SI_OP
47010 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47011 #ifdef ASM_QUAD
47012 #undef TARGET_ASM_ALIGNED_DI_OP
47013 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47014 #endif
47015
47016 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47017 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47018
47019 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47020 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47021
47022 #undef TARGET_ASM_UNALIGNED_HI_OP
47023 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47024 #undef TARGET_ASM_UNALIGNED_SI_OP
47025 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47026 #undef TARGET_ASM_UNALIGNED_DI_OP
47027 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47028
47029 #undef TARGET_PRINT_OPERAND
47030 #define TARGET_PRINT_OPERAND ix86_print_operand
47031 #undef TARGET_PRINT_OPERAND_ADDRESS
47032 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47033 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47034 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47035 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47036 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47037
47038 #undef TARGET_SCHED_INIT_GLOBAL
47039 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47040 #undef TARGET_SCHED_ADJUST_COST
47041 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47042 #undef TARGET_SCHED_ISSUE_RATE
47043 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47044 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47045 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47046 ia32_multipass_dfa_lookahead
47047 #undef TARGET_SCHED_MACRO_FUSION_P
47048 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47049 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47050 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47051
47052 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47053 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47054
47055 #undef TARGET_MEMMODEL_CHECK
47056 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47057
47058 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47059 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47060
47061 #ifdef HAVE_AS_TLS
47062 #undef TARGET_HAVE_TLS
47063 #define TARGET_HAVE_TLS true
47064 #endif
47065 #undef TARGET_CANNOT_FORCE_CONST_MEM
47066 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47067 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47068 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47069
47070 #undef TARGET_DELEGITIMIZE_ADDRESS
47071 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47072
47073 #undef TARGET_MS_BITFIELD_LAYOUT_P
47074 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47075
47076 #if TARGET_MACHO
47077 #undef TARGET_BINDS_LOCAL_P
47078 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47079 #endif
47080 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47081 #undef TARGET_BINDS_LOCAL_P
47082 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47083 #endif
47084
47085 #undef TARGET_ASM_OUTPUT_MI_THUNK
47086 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47087 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47088 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47089
47090 #undef TARGET_ASM_FILE_START
47091 #define TARGET_ASM_FILE_START x86_file_start
47092
47093 #undef TARGET_OPTION_OVERRIDE
47094 #define TARGET_OPTION_OVERRIDE ix86_option_override
47095
47096 #undef TARGET_REGISTER_MOVE_COST
47097 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47098 #undef TARGET_MEMORY_MOVE_COST
47099 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47100 #undef TARGET_RTX_COSTS
47101 #define TARGET_RTX_COSTS ix86_rtx_costs
47102 #undef TARGET_ADDRESS_COST
47103 #define TARGET_ADDRESS_COST ix86_address_cost
47104
47105 #undef TARGET_FIXED_CONDITION_CODE_REGS
47106 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47107 #undef TARGET_CC_MODES_COMPATIBLE
47108 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47109
47110 #undef TARGET_MACHINE_DEPENDENT_REORG
47111 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47112
47113 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47114 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47115
47116 #undef TARGET_BUILD_BUILTIN_VA_LIST
47117 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47118
47119 #undef TARGET_FOLD_BUILTIN
47120 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47121
47122 #undef TARGET_COMPARE_VERSION_PRIORITY
47123 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47124
47125 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47126 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47127 ix86_generate_version_dispatcher_body
47128
47129 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47130 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47131 ix86_get_function_versions_dispatcher
47132
47133 #undef TARGET_ENUM_VA_LIST_P
47134 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47135
47136 #undef TARGET_FN_ABI_VA_LIST
47137 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47138
47139 #undef TARGET_CANONICAL_VA_LIST_TYPE
47140 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47141
47142 #undef TARGET_EXPAND_BUILTIN_VA_START
47143 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47144
47145 #undef TARGET_MD_ASM_CLOBBERS
47146 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47147
47148 #undef TARGET_PROMOTE_PROTOTYPES
47149 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47150 #undef TARGET_SETUP_INCOMING_VARARGS
47151 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47152 #undef TARGET_MUST_PASS_IN_STACK
47153 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47154 #undef TARGET_FUNCTION_ARG_ADVANCE
47155 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47156 #undef TARGET_FUNCTION_ARG
47157 #define TARGET_FUNCTION_ARG ix86_function_arg
47158 #undef TARGET_FUNCTION_ARG_BOUNDARY
47159 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47160 #undef TARGET_PASS_BY_REFERENCE
47161 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47162 #undef TARGET_INTERNAL_ARG_POINTER
47163 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47164 #undef TARGET_UPDATE_STACK_BOUNDARY
47165 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47166 #undef TARGET_GET_DRAP_RTX
47167 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47168 #undef TARGET_STRICT_ARGUMENT_NAMING
47169 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47170 #undef TARGET_STATIC_CHAIN
47171 #define TARGET_STATIC_CHAIN ix86_static_chain
47172 #undef TARGET_TRAMPOLINE_INIT
47173 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47174 #undef TARGET_RETURN_POPS_ARGS
47175 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47176
47177 #undef TARGET_LEGITIMATE_COMBINED_INSN
47178 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47179
47180 #undef TARGET_ASAN_SHADOW_OFFSET
47181 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47182
47183 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47184 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47185
47186 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47187 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47188
47189 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47190 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47191
47192 #undef TARGET_C_MODE_FOR_SUFFIX
47193 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47194
47195 #ifdef HAVE_AS_TLS
47196 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47197 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47198 #endif
47199
47200 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47201 #undef TARGET_INSERT_ATTRIBUTES
47202 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47203 #endif
47204
47205 #undef TARGET_MANGLE_TYPE
47206 #define TARGET_MANGLE_TYPE ix86_mangle_type
47207
47208 #if !TARGET_MACHO
47209 #undef TARGET_STACK_PROTECT_FAIL
47210 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47211 #endif
47212
47213 #undef TARGET_FUNCTION_VALUE
47214 #define TARGET_FUNCTION_VALUE ix86_function_value
47215
47216 #undef TARGET_FUNCTION_VALUE_REGNO_P
47217 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47218
47219 #undef TARGET_PROMOTE_FUNCTION_MODE
47220 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47221
47222 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47223 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47224
47225 #undef TARGET_INSTANTIATE_DECLS
47226 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47227
47228 #undef TARGET_SECONDARY_RELOAD
47229 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47230
47231 #undef TARGET_CLASS_MAX_NREGS
47232 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47233
47234 #undef TARGET_PREFERRED_RELOAD_CLASS
47235 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47236 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47237 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47238 #undef TARGET_CLASS_LIKELY_SPILLED_P
47239 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47240
47241 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47242 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47243 ix86_builtin_vectorization_cost
47244 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47245 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47246 ix86_vectorize_vec_perm_const_ok
47247 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47248 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47249 ix86_preferred_simd_mode
47250 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47251 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47252 ix86_autovectorize_vector_sizes
47253 #undef TARGET_VECTORIZE_INIT_COST
47254 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47255 #undef TARGET_VECTORIZE_ADD_STMT_COST
47256 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47257 #undef TARGET_VECTORIZE_FINISH_COST
47258 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47259 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47260 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47261
47262 #undef TARGET_SET_CURRENT_FUNCTION
47263 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47264
47265 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47266 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47267
47268 #undef TARGET_OPTION_SAVE
47269 #define TARGET_OPTION_SAVE ix86_function_specific_save
47270
47271 #undef TARGET_OPTION_RESTORE
47272 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47273
47274 #undef TARGET_OPTION_PRINT
47275 #define TARGET_OPTION_PRINT ix86_function_specific_print
47276
47277 #undef TARGET_OPTION_FUNCTION_VERSIONS
47278 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47279
47280 #undef TARGET_CAN_INLINE_P
47281 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47282
47283 #undef TARGET_EXPAND_TO_RTL_HOOK
47284 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47285
47286 #undef TARGET_LEGITIMATE_ADDRESS_P
47287 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47288
47289 #undef TARGET_LRA_P
47290 #define TARGET_LRA_P hook_bool_void_true
47291
47292 #undef TARGET_REGISTER_PRIORITY
47293 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47294
47295 #undef TARGET_REGISTER_USAGE_LEVELING_P
47296 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47297
47298 #undef TARGET_LEGITIMATE_CONSTANT_P
47299 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47300
47301 #undef TARGET_FRAME_POINTER_REQUIRED
47302 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47303
47304 #undef TARGET_CAN_ELIMINATE
47305 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47306
47307 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47308 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47309
47310 #undef TARGET_ASM_CODE_END
47311 #define TARGET_ASM_CODE_END ix86_code_end
47312
47313 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47314 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47315
47316 #if TARGET_MACHO
47317 #undef TARGET_INIT_LIBFUNCS
47318 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47319 #endif
47320
47321 #undef TARGET_LOOP_UNROLL_ADJUST
47322 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47323
47324 #undef TARGET_SPILL_CLASS
47325 #define TARGET_SPILL_CLASS ix86_spill_class
47326
47327 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47328 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47329 ix86_simd_clone_compute_vecsize_and_simdlen
47330
47331 #undef TARGET_SIMD_CLONE_ADJUST
47332 #define TARGET_SIMD_CLONE_ADJUST \
47333 ix86_simd_clone_adjust
47334
47335 #undef TARGET_SIMD_CLONE_USABLE
47336 #define TARGET_SIMD_CLONE_USABLE \
47337 ix86_simd_clone_usable
47338
47339 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47340 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47341 ix86_float_exceptions_rounding_supported_p
47342
47343 #undef TARGET_MODE_EMIT
47344 #define TARGET_MODE_EMIT ix86_emit_mode_set
47345
47346 #undef TARGET_MODE_NEEDED
47347 #define TARGET_MODE_NEEDED ix86_mode_needed
47348
47349 #undef TARGET_MODE_AFTER
47350 #define TARGET_MODE_AFTER ix86_mode_after
47351
47352 #undef TARGET_MODE_ENTRY
47353 #define TARGET_MODE_ENTRY ix86_mode_entry
47354
47355 #undef TARGET_MODE_EXIT
47356 #define TARGET_MODE_EXIT ix86_mode_exit
47357
47358 #undef TARGET_MODE_PRIORITY
47359 #define TARGET_MODE_PRIORITY ix86_mode_priority
47360
47361 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47362 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47363
47364 struct gcc_target targetm = TARGET_INITIALIZER;
47365 \f
47366 #include "gt-i386.h"